library(stringr)
library(sva)
## Loading required package: mgcv
## Loading required package: nlme
## This is mgcv 1.8-32. For overview type 'help("mgcv-package")'.
## Loading required package: genefilter
## Loading required package: BiocParallel
library(ASSIGN)
library(data.table)
suppressMessages(require(Seurat))
suppressMessages(require(ggplot2))
suppressMessages(require(cowplot))
suppressMessages(require(scater))
suppressMessages(require(scran))
suppressMessages(require(BiocParallel))
suppressMessages(require(BiocNeighbors))
#generate 25 gene expression signature
setwd("/home/li/covid19/result01/total")
cell24_4<-read.table("24celllines_4patients_norm.txt")
write.csv(cell24_4,"cell24_4.csv")
cell24_4<-read.csv("cell24_4.csv",header=TRUE)
cell24_4<-(log2(data.frame(cell24_4,check.names = F, row.names=1)+1))
colnames(cell24_4)
## [1] "S5_A549_Mock_1" "S5_A549_Mock_2" "S5_A549_Mock_3"
## [4] "S6_A549.ACE2_Mock_1" "S6_A549.ACE2_Mock_2" "S6_A549.ACE2_Mock_3"
## [7] "S7_Calu3_Mock_1" "S7_Calu3_Mock_2" "S7_Calu3_Mock_3"
## [10] "S16_A549.ACE2_Mock_1" "S16_A549.ACE2_Mock_2" "S16_A549.ACE2_Mock_3"
## [13] "S15_Healthy_2" "S15_Healthy_1" "S5_A549_CoV.2_1"
## [16] "S5_A549_CoV.2_2" "S5_A549_CoV.2_3" "S6_A549.ACE2_CoV.2_1"
## [19] "S6_A549.ACE2_CoV.2_2" "S6_A549.ACE2_CoV.2_3" "S7_Calu3_CoV.2_1"
## [22] "S7_Calu3_CoV.2_2" "S7_Calu3_CoV.2_3" "S16_A549.ACE2_CoV.2_1"
## [25] "S16_A549.ACE2_CoV.2_2" "S16_A549.ACE2_CoV.2_3" "S15_COV2_2"
## [28] "S15_COV2_1"
#rownames(cell24_4)
plot(hclust(dist(t(cell24_4)),method="complete"))

cell24_4_filt<-(cell24_4[apply(cell24_4==0,1,mean)<0.1,])
pca<-prcomp(t(cell24_4_filt))
plot(pca)

{plot(pca$x[,1],pca$x[,2])
points(pca$x[1:14,1],pca$x[1:14,2],col=2,pch=2)
points(pca$x[15:28,1],pca$x[15:28,2],col=3,pch=2)
}

which(pca$x[,1]< -100)
## named integer(0)
which(pca$x[,1]< -20)
## S5_A549_Mock_1 S5_A549_Mock_2 S5_A549_Mock_3
## 1 2 3
## S6_A549.ACE2_Mock_1 S6_A549.ACE2_Mock_2 S6_A549.ACE2_Mock_3
## 4 5 6
## S7_Calu3_Mock_1 S7_Calu3_Mock_2 S7_Calu3_Mock_3
## 7 8 9
## S16_A549.ACE2_Mock_1 S16_A549.ACE2_Mock_2 S16_A549.ACE2_Mock_3
## 10 11 12
## S5_A549_CoV.2_1 S5_A549_CoV.2_2 S5_A549_CoV.2_3
## 15 16 17
## S6_A549.ACE2_CoV.2_1 S6_A549.ACE2_CoV.2_2 S6_A549.ACE2_CoV.2_3
## 18 19 20
## S7_Calu3_CoV.2_3 S16_A549.ACE2_CoV.2_1 S16_A549.ACE2_CoV.2_2
## 23 24 25
## S16_A549.ACE2_CoV.2_3
## 26
which(pca$x[,1]< 0)
## S5_A549_Mock_1 S5_A549_Mock_2 S5_A549_Mock_3
## 1 2 3
## S6_A549.ACE2_Mock_1 S6_A549.ACE2_Mock_2 S6_A549.ACE2_Mock_3
## 4 5 6
## S7_Calu3_Mock_1 S7_Calu3_Mock_2 S7_Calu3_Mock_3
## 7 8 9
## S16_A549.ACE2_Mock_1 S16_A549.ACE2_Mock_2 S16_A549.ACE2_Mock_3
## 10 11 12
## S5_A549_CoV.2_1 S5_A549_CoV.2_2 S5_A549_CoV.2_3
## 15 16 17
## S6_A549.ACE2_CoV.2_1 S6_A549.ACE2_CoV.2_2 S6_A549.ACE2_CoV.2_3
## 18 19 20
## S7_Calu3_CoV.2_1 S7_Calu3_CoV.2_2 S7_Calu3_CoV.2_3
## 21 22 23
## S16_A549.ACE2_CoV.2_1 S16_A549.ACE2_CoV.2_2 S16_A549.ACE2_CoV.2_3
## 24 25 26
bat <-
as.data.frame(cbind(
colnames(cell24_4_filt),
c(
rep(1, 3),
rep(2, 3),
rep(3, 3),
rep(4, 3),
rep(5, 2),
rep(1, 3),
rep(2, 3),
rep(3, 3),
rep(4, 3),
rep(5, 2)
),
c(rep(1, 14), rep(2, 14))
))
mod<-model.matrix(~as.factor(bat[,3]), data=bat)
combat_cell24_4<-ComBat(dat = as.matrix(cell24_4_filt,),batch = (bat[,2]),mod=mod,par.prior = T)
## Found5batches
## Adjusting for1covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data
##PCA post combat
pca<-prcomp(t(combat_cell24_4))
plot(pca)

{plot(pca$x[,1],pca$x[,2])
points(pca$x[1:14,1],pca$x[1:14,2], main="Top 2 PCs",col=2)
points(pca$x[15:28,1],pca$x[15:28,2], main="Top 2 PCs",col=3)
}

which(pca$x[,1]< -50)
## S7_Calu3_Mock_2 S16_A549.ACE2_Mock_1 S16_A549.ACE2_Mock_3
## 8 10 12
## S15_COV2_1
## 28
which(pca$x[,2]< -50)
## S15_Healthy_2
## 13
c_mock<-as.matrix(combat_cell24_4[,c(1:12)])
c_cov<-as.matrix(combat_cell24_4[,c(15:26)])
test<-as.matrix(combat_cell24_4[,c(13:14,27:28)])
trainingLabela <- list(control=list(mock=1:12),cov=13:24)
basedir<-getwd()
sub_dir <- paste(basedir,paste("cov", 25, sep=""),sep='/')
dir.create(sub_dir)
set.seed(1220)
assign.wrapper(
trainingData = cbind(c_mock, c_cov),
testData = test,
trainingLabel = trainingLabela,
geneList = NULL,
n_sigGene = 25,
adaptive_B = T,
adaptive_S = F,
outputDir = sub_dir,
p_beta = 0.01,
theta0 = 0.05,
theta1 = 0.9,
iter = 2000,
burn_in = 1000)
## Performing QC on the input data...
## Warning in assign.preprocess(trainingData, testData, anchorGenes, excludeGenes, : Control Labels DO NOT match the experimental Labels!
## Please make sure that you specify the correct indice for control and experimental samples in the trainingLabel!
## Generating starting/prior values for model parameters...
## Gene selection on cov pathway...
## | 0% 50% 100% |
## Estimating model parameters in the training dataset...
## Start Gibbs sampling...
## | 0% 50% 100% |
## Estimating model parameters in the test dataset...
## Start Gibbs sampling...
## | 0% 50% 100% |
## Outputing results...
#use series 15 to test positive
cell5_6_7_16_15<-read.table("56716_15positive.txt",header=TRUE)
write.csv(cell5_6_7_16_15,"cell5_6_7_16_15.csv")
mock_cov<-read.csv("cell5_6_7_16_15.csv", header = TRUE)
mock_cov<-(log2(data.frame(mock_cov,check.names = F, row.names=1)+1))
mock_cov_filt<-(mock_cov[apply(mock_cov==0,1,mean)<0.1,])
bat <-as.data.frame(cbind(
colnames(cell5_6_7_16_15),
c(rep(1, 3),rep(2, 3),rep(3, 3),rep(4, 3),rep(5, 2),rep(1, 3),rep(2, 3),rep(3, 3),rep(4, 3),rep(5, 2)),
c(rep(1, 14), rep(2, 14))))
mod<-model.matrix(~as.factor(bat[,3]), data=bat)
combat_mock_cov<-ComBat(dat = as.matrix(mock_cov_filt,),batch = (bat[,2]),mod=mod,par.prior = T)
## Found5batches
## Adjusting for1covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data
c_mock<-as.matrix(combat_mock_cov[,c(1:12)])
c_cov<-as.matrix(combat_mock_cov[,c(15:26)])
test<-as.matrix(combat_mock_cov[,c(13:14,27:28)])
trainingLabela <- list(control=list(mock=1:12),cov=13:24)
genelist_25<-read.csv("signature_gene_list_prior_25yueli.csv")
basedir<-getwd()
sub_dir <- paste(basedir,paste("cov_positive", 25, sep=""),sep='/')
dir.create(sub_dir)
set.seed(1220)
assign.wrapper(
trainingData = cbind(c_mock, c_cov),
testData = test,
trainingLabel = trainingLabela,
geneList = list(genelist_25$X),
adaptive_B = T,
adaptive_S = F,
outputDir = sub_dir,
p_beta = 0.01,
theta0 = 0.05,
theta1 = 0.9,
iter = 2000,
burn_in = 1000)
## Performing QC on the input data...
## Warning in assign.preprocess(trainingData, testData, anchorGenes, excludeGenes, : Control Labels DO NOT match the experimental Labels!
## Please make sure that you specify the correct indice for control and experimental samples in the trainingLabel!
## Generating starting/prior values for model parameters...
## Estimating model parameters in the training dataset...
## Start Gibbs sampling...
## | 0% 50% 100% |
## Estimating model parameters in the test dataset...
## Start Gibbs sampling...
## | 0% 50% 100% |
## Outputing results...
#use series 2 to test negative
cell5_6_7_16_2<-read.table("56716_2negative.txt",header=TRUE)
write.csv(cell5_6_7_16_2,"cell5_6_7_16_2.csv")
mock_cov<-read.csv("cell5_6_7_16_2.csv", header = TRUE)
mock_cov<-(log2(data.frame(mock_cov,check.names = F, row.names=1)+1))
mock_cov_filt<-(mock_cov[apply(mock_cov==0,1,mean)<0.1,])
bat <-as.data.frame(cbind(
colnames(cell5_6_7_16_2),
c(rep(1, 3),rep(2, 3),rep(3, 3),rep(4, 3),rep(5, 3),rep(1, 3),rep(2, 3),rep(3, 3),rep(4, 3),rep(5, 3)),
c(rep(1, 15), rep(2, 15))))
mod<-model.matrix(~as.factor(bat[,3]), data=bat)
combat_mock_cov<-ComBat(dat = as.matrix(mock_cov_filt,),batch = (bat[,2]),mod=mod,par.prior = T)
## Found5batches
## Adjusting for1covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data
c_mock<-as.matrix(combat_mock_cov[,c(1:12)])
c_cov<-as.matrix(combat_mock_cov[,c(16:27)])
test<-as.matrix(combat_mock_cov[,c(13:15,28:30)])
trainingLabela <- list(control=list(mock=1:12),cov=13:24)
genelist_25<-read.csv("signature_gene_list_prior_25yueli.csv")
basedir<-getwd()
sub_dir <- paste(basedir,paste("cov_negative", 25, sep=""),sep='/')
dir.create(sub_dir)
set.seed(1220)
assign.wrapper(
trainingData = cbind(c_mock, c_cov),
testData = test,
trainingLabel = trainingLabela,
geneList = list(genelist_25$X),
adaptive_B = T,
adaptive_S = F,
outputDir = sub_dir,
p_beta = 0.01,
theta0 = 0.05,
theta1 = 0.9,
iter = 2000,
burn_in = 1000)
## Performing QC on the input data...
## Warning in assign.preprocess(trainingData, testData, anchorGenes, excludeGenes, : Control Labels DO NOT match the experimental Labels!
## Please make sure that you specify the correct indice for control and experimental samples in the trainingLabel!
## Generating starting/prior values for model parameters...
## Estimating model parameters in the training dataset...
## Start Gibbs sampling...
## | 0% 50% 100% |
## Estimating model parameters in the test dataset...
## Start Gibbs sampling...
## | 0% 50% 100% |
## Outputing results...
# BALF
cell_5_6_7_16_BALF<-read.table("cell_5_6_7_16_BALF_norm.txt",head=TRUE)
write.csv(cell_5_6_7_16_BALF,"cell_5_6_7_16_BALF.csv")
cells_4_BALF<-read.csv("cell_5_6_7_16_BALF.csv",header=TRUE)
cells_4_BALF<-(log2(data.frame(cells_4_BALF,check.names = F, row.names=1)+1))
colnames(cells_4_BALF)
## [1] "Series5_A549_Mock_1" "Series5_A549_Mock_2"
## [3] "Series5_A549_Mock_3" "Series6_A549.ACE2_Mock_1"
## [5] "Series6_A549.ACE2_Mock_2" "Series6_A549.ACE2_Mock_3"
## [7] "Series7_Calu3_Mock_1" "Series7_Calu3_Mock_2"
## [9] "Series7_Calu3_Mock_3" "Series16_A549.ACE2_Mock_1"
## [11] "Series16_A549.ACE2_Mock_2" "Series16_A549.ACE2_Mock_3"
## [13] "SRR10571724" "SRR10571730"
## [15] "SRR10571732" "Series5_A549_SARS.CoV.2_1"
## [17] "Series5_A549_SARS.CoV.2_2" "Series5_A549_SARS.CoV.2_3"
## [19] "Series6_A549.ACE2_SARS.CoV.2_1" "Series6_A549.ACE2_SARS.CoV.2_2"
## [21] "Series6_A549.ACE2_SARS.CoV.2_3" "Series7_Calu3_SARS.CoV.2_1"
## [23] "Series7_Calu3_SARS.CoV.2_2" "Series7_Calu3_SARS.CoV.2_3"
## [25] "Series16_A549.ACE2_SARS.CoV.2_1" "Series16_A549.ACE2_SARS.CoV.2_2"
## [27] "Series16_A549.ACE2_SARS.CoV.2_3" "CRR119894"
## [29] "CRR119895" "CRR119896"
## [31] "CRR119897"
#rownames(cells_4_BALF)
plot(hclust(dist(t(cells_4_BALF)),method="complete"))##samples are rather clustering by type than the COV2 infection status

#Series 15 is furthest from the others and that makes sense since these are from patients rather than cell lines. We should exclude Series 15 from the signature generation dataset.
##filter all zeroes
cells_4_BALF_filt<-(cells_4_BALF[apply(cells_4_BALF==0,1,mean)<0.1,])
#precombat PCA
pca<-prcomp(t(cells_4_BALF_filt))
plot(pca)

{plot(pca$x[,1],pca$x[,2])
points(pca$x[1:15,1],pca$x[1:15,2],col=2,pch=2)
points(pca$x[16:31,1],pca$x[16:31,2],col=3,pch=2)
}

which(pca$x[,1]< -100)
## named integer(0)
which(pca$x[,1]< -20)
## Series5_A549_Mock_1 Series5_A549_Mock_2
## 1 2
## Series5_A549_Mock_3 Series6_A549.ACE2_Mock_1
## 3 4
## Series6_A549.ACE2_Mock_2 Series6_A549.ACE2_Mock_3
## 5 6
## Series7_Calu3_Mock_1 Series7_Calu3_Mock_2
## 7 8
## Series7_Calu3_Mock_3 Series16_A549.ACE2_Mock_1
## 9 10
## Series16_A549.ACE2_Mock_2 Series16_A549.ACE2_Mock_3
## 11 12
## Series5_A549_SARS.CoV.2_1 Series5_A549_SARS.CoV.2_2
## 16 17
## Series5_A549_SARS.CoV.2_3 Series6_A549.ACE2_SARS.CoV.2_1
## 18 19
## Series6_A549.ACE2_SARS.CoV.2_2 Series6_A549.ACE2_SARS.CoV.2_3
## 20 21
## Series7_Calu3_SARS.CoV.2_1 Series7_Calu3_SARS.CoV.2_2
## 22 23
## Series7_Calu3_SARS.CoV.2_3 Series16_A549.ACE2_SARS.CoV.2_1
## 24 25
## Series16_A549.ACE2_SARS.CoV.2_2 Series16_A549.ACE2_SARS.CoV.2_3
## 26 27
which(pca$x[,1]< 0)
## Series5_A549_Mock_1 Series5_A549_Mock_2
## 1 2
## Series5_A549_Mock_3 Series6_A549.ACE2_Mock_1
## 3 4
## Series6_A549.ACE2_Mock_2 Series6_A549.ACE2_Mock_3
## 5 6
## Series7_Calu3_Mock_1 Series7_Calu3_Mock_2
## 7 8
## Series7_Calu3_Mock_3 Series16_A549.ACE2_Mock_1
## 9 10
## Series16_A549.ACE2_Mock_2 Series16_A549.ACE2_Mock_3
## 11 12
## Series5_A549_SARS.CoV.2_1 Series5_A549_SARS.CoV.2_2
## 16 17
## Series5_A549_SARS.CoV.2_3 Series6_A549.ACE2_SARS.CoV.2_1
## 18 19
## Series6_A549.ACE2_SARS.CoV.2_2 Series6_A549.ACE2_SARS.CoV.2_3
## 20 21
## Series7_Calu3_SARS.CoV.2_1 Series7_Calu3_SARS.CoV.2_2
## 22 23
## Series7_Calu3_SARS.CoV.2_3 Series16_A549.ACE2_SARS.CoV.2_1
## 24 25
## Series16_A549.ACE2_SARS.CoV.2_2 Series16_A549.ACE2_SARS.CoV.2_3
## 26 27
###we need to batch adjust based on the cell types
##Series5_A549-1 Series6_A549.ACE2-2 Series7_Calu3-3 Series16_A549.ACE2-4, patients-5
bat <-as.data.frame(cbind(
colnames(cells_4_BALF_filt),
c(rep(1, 3),rep(2, 3),rep(3, 3),rep(4, 3),rep(5, 3),rep(1, 3),rep(2, 3),rep(3, 3),rep(4, 3),rep(6, 2), rep(7, 2)),
c(rep(1, 15), rep(2, 16))))
mod<-model.matrix(~as.factor(bat[,3]), data=bat)
combat_mock_cov<-ComBat(dat = as.matrix(cells_4_BALF_filt,),batch = (bat[,2]),mod=mod,par.prior = T)
## Found7batches
## Adjusting for1covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data
write.csv(combat_mock_cov,"combat_cell56716_BALF.csv")
plot(hclust(dist(t(combat_mock_cov)),method="complete"))

##PCA post combat
pca<-prcomp(t(combat_mock_cov))
plot(pca)

{plot(pca$x[,1],pca$x[,2])
points(pca$x[1:15,1],pca$x[1:15,2], main="Top 2 PCs",col=2)
points(pca$x[16:31,1],pca$x[16:31,2], main="Top 2 PCs",col=3)}

which(pca$x[,1]< -50)
## named integer(0)
which(pca$x[,2]< -50)
## named integer(0)
########running assign with the best 25 genes found in the cell line data########
c_mock<-as.matrix(combat_mock_cov[,c(1:12)])
c_cov<-as.matrix(combat_mock_cov[,c(16:27)])
test<-as.matrix(combat_mock_cov[,c(13:15,28:31)])
trainingLabela <- list(control=list(mock=1:12),cov=13:24)
genelist_25<-read.csv("signature_gene_list_prior_25yueli.csv")
basedir<-getwd()
sub_dir <- paste(basedir,paste("cov_BALF", 25, sep=""),sep='/')
dir.create(sub_dir)
set.seed(1220)
assign.wrapper(
trainingData = cbind(c_mock, c_cov),
testData = test,
trainingLabel = trainingLabela,
geneList = list(genelist_25$X),
adaptive_B = T,
adaptive_S = F,
outputDir = sub_dir,
p_beta = 0.01,
theta0 = 0.05,
theta1 = 0.9,
iter = 2000,
burn_in = 1000)
## Performing QC on the input data...
## Warning in assign.preprocess(trainingData, testData, anchorGenes, excludeGenes, : Control Labels DO NOT match the experimental Labels!
## Please make sure that you specify the correct indice for control and experimental samples in the trainingLabel!
## Generating starting/prior values for model parameters...
## Estimating model parameters in the training dataset...
## Start Gibbs sampling...
## | 0% 50% 100% |
## Estimating model parameters in the test dataset...
## Start Gibbs sampling...
## | 0% 50% 100% |
## Outputing results...
#PBMC
cell_5_6_7_16_PBMC<-read.table("cell_5_6_7_16_PBMC_norm.txt",head=TRUE)
write.csv(cell_5_6_7_16_PBMC,"cell_5_6_7_16_PBMC.csv")
cells_4_PBMC<-read.csv("cell_5_6_7_16_PBMC.csv",header=TRUE)
cells_4_PBMC<-(log2(data.frame(cells_4_PBMC,check.names = F, row.names=1)+1))
colnames(cells_4_PBMC)
## [1] "Series5_A549_Mock_1" "Series5_A549_Mock_2"
## [3] "Series5_A549_Mock_3" "Series6_A549.ACE2_Mock_1"
## [5] "Series6_A549.ACE2_Mock_2" "Series6_A549.ACE2_Mock_3"
## [7] "Series7_Calu3_Mock_1" "Series7_Calu3_Mock_2"
## [9] "Series7_Calu3_Mock_3" "Series16_A549.ACE2_Mock_1"
## [11] "Series16_A549.ACE2_Mock_2" "Series16_A549.ACE2_Mock_3"
## [13] "CRR119890" "CRR125445"
## [15] "CRR125446" "Series5_A549_SARS.CoV.2_1"
## [17] "Series5_A549_SARS.CoV.2_2" "Series5_A549_SARS.CoV.2_3"
## [19] "Series6_A549.ACE2_SARS.CoV.2_1" "Series6_A549.ACE2_SARS.CoV.2_2"
## [21] "Series6_A549.ACE2_SARS.CoV.2_3" "Series7_Calu3_SARS.CoV.2_1"
## [23] "Series7_Calu3_SARS.CoV.2_2" "Series7_Calu3_SARS.CoV.2_3"
## [25] "Series16_A549.ACE2_SARS.CoV.2_1" "Series16_A549.ACE2_SARS.CoV.2_2"
## [27] "Series16_A549.ACE2_SARS.CoV.2_3" "CRR119891"
## [29] "CRR119892" "CRR119893"
#rownames(cells_4_PBMC)
plot(hclust(dist(t(cells_4_PBMC)),method="complete"))##samples are rather clustering by type than the COV2 infection status

#Series 15 is furthest from the others and that makes sense since these are from patients rather than cell lines. We should exclude Series 15 from the signature generation dataset.
##filter all zeroes
cells_4_PBMC_filt<-(cells_4_PBMC[apply(cells_4_PBMC==0,1,mean)<0.1,])
#precombat PCA
pca<-prcomp(t(cells_4_PBMC_filt))
plot(pca)

{plot(pca$x[,1],pca$x[,2])
points(pca$x[1:15,1],pca$x[1:15,2],col=2,pch=2)
points(pca$x[16:30,1],pca$x[16:30,2],col=3,pch=2)
}

which(pca$x[,1]< -100)
## named integer(0)
which(pca$x[,1]< -20)
## Series5_A549_Mock_1 Series5_A549_Mock_2
## 1 2
## Series5_A549_Mock_3 Series6_A549.ACE2_Mock_1
## 3 4
## Series6_A549.ACE2_Mock_2 Series6_A549.ACE2_Mock_3
## 5 6
## Series7_Calu3_Mock_1 Series7_Calu3_Mock_2
## 7 8
## Series7_Calu3_Mock_3 Series16_A549.ACE2_Mock_1
## 9 10
## Series16_A549.ACE2_Mock_2 Series16_A549.ACE2_Mock_3
## 11 12
## Series5_A549_SARS.CoV.2_1 Series5_A549_SARS.CoV.2_2
## 16 17
## Series5_A549_SARS.CoV.2_3 Series6_A549.ACE2_SARS.CoV.2_1
## 18 19
## Series6_A549.ACE2_SARS.CoV.2_2 Series6_A549.ACE2_SARS.CoV.2_3
## 20 21
## Series7_Calu3_SARS.CoV.2_1 Series7_Calu3_SARS.CoV.2_2
## 22 23
## Series7_Calu3_SARS.CoV.2_3 Series16_A549.ACE2_SARS.CoV.2_1
## 24 25
## Series16_A549.ACE2_SARS.CoV.2_2 Series16_A549.ACE2_SARS.CoV.2_3
## 26 27
which(pca$x[,1]< 0)
## Series5_A549_Mock_1 Series5_A549_Mock_2
## 1 2
## Series5_A549_Mock_3 Series6_A549.ACE2_Mock_1
## 3 4
## Series6_A549.ACE2_Mock_2 Series6_A549.ACE2_Mock_3
## 5 6
## Series7_Calu3_Mock_1 Series7_Calu3_Mock_2
## 7 8
## Series7_Calu3_Mock_3 Series16_A549.ACE2_Mock_1
## 9 10
## Series16_A549.ACE2_Mock_2 Series16_A549.ACE2_Mock_3
## 11 12
## Series5_A549_SARS.CoV.2_1 Series5_A549_SARS.CoV.2_2
## 16 17
## Series5_A549_SARS.CoV.2_3 Series6_A549.ACE2_SARS.CoV.2_1
## 18 19
## Series6_A549.ACE2_SARS.CoV.2_2 Series6_A549.ACE2_SARS.CoV.2_3
## 20 21
## Series7_Calu3_SARS.CoV.2_1 Series7_Calu3_SARS.CoV.2_2
## 22 23
## Series7_Calu3_SARS.CoV.2_3 Series16_A549.ACE2_SARS.CoV.2_1
## 24 25
## Series16_A549.ACE2_SARS.CoV.2_2 Series16_A549.ACE2_SARS.CoV.2_3
## 26 27
###we need to batch adjust based on the cell types
##Series5_A549-1 Series6_A549.ACE2-2 Series7_Calu3-3 Series16_A549.ACE2-4, patients-5
bat <-as.data.frame(cbind(
colnames(cells_4_PBMC_filt),
c(rep(1, 3),rep(2, 3),rep(3, 3),rep(4, 3),rep(5, 3),rep(1, 3),rep(2, 3),rep(3, 3),rep(4, 3),rep(6, 3)),
c(rep(1, 15), rep(2, 15))))
mod<-model.matrix(~as.factor(bat[,3]), data=bat)
combat_mock_cov<-ComBat(dat = as.matrix(cells_4_PBMC_filt,),batch = (bat[,2]),mod=mod,par.prior = T)
## Found6batches
## Adjusting for1covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data
##PCA post combat
pca<-prcomp(t(combat_mock_cov))
plot(pca)

{plot(pca$x[,1],pca$x[,2])
points(pca$x[1:15,1],pca$x[1:15,2], main="Top 2 PCs",col=2)
points(pca$x[16:30,1],pca$x[16:30,2], main="Top 2 PCs",col=3)}

which(pca$x[,1]< -50)
## named integer(0)
which(pca$x[,2]< -50)
## named integer(0)
plot(hclust(dist(t(combat_mock_cov)),method="complete"))

########running assign with the best 25 genes found in the cell line data########
#combat_mock_cov<-read.table("combat_cell56716_PBMC.txt",header=TRUE)
c_mock<-as.matrix(combat_mock_cov[,c(1:12)])
c_cov<-as.matrix(combat_mock_cov[,c(16:27)])
test<-as.matrix(combat_mock_cov[,c(13:15,28:30)])
trainingLabela <- list(control=list(mock=1:12),cov=13:24)
genelist_25<-read.csv("signature_gene_list_prior_25yueli.csv")
basedir<-getwd()
sub_dir <- paste(basedir,paste("cov_PBMC", 25, sep=""),sep='/')
dir.create(sub_dir)
set.seed(1220)
assign.wrapper(
trainingData = cbind(c_mock, c_cov),
testData = test,
trainingLabel = trainingLabela,
geneList = list(genelist_25$X),
n_sigGene = 25,
adaptive_B = T,
adaptive_S = F,
outputDir = sub_dir,
p_beta = 0.01,
theta0 = 0.05,
theta1 = 0.9,
iter = 2000,
burn_in = 1000)
## Performing QC on the input data...
## Warning in assign.preprocess(trainingData, testData, anchorGenes, excludeGenes, : Control Labels DO NOT match the experimental Labels!
## Please make sure that you specify the correct indice for control and experimental samples in the trainingLabel!
## Generating starting/prior values for model parameters...
## Estimating model parameters in the training dataset...
## Start Gibbs sampling...
## | 0% 50% 100% |
## Estimating model parameters in the test dataset...
## Start Gibbs sampling...
## | 0% 50% 100% |
## Outputing results...
#single_cell_integrate
#input, CreateSeuratObject, filter, save
a<-"hc_51_02.csv"
a1<-data.frame(fread(a),check.names=FALSE, row.names=1)
pbmc<- CreateSeuratObject(counts = a1, project = "h1", min.cells = 3, min.features = 200)
pbmc
## An object of class Seurat
## 15385 features across 11113 samples within 1 assay
## Active assay: RNA (15385 features, 0 variable features)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
## PC_ 1
## Positive: IL32, CD2, TRAC, CCL5, LTB, TRBC2, CORO1A, TRBC1, KLRB1, CD3D
## CD3E, IFITM1, GZMA, CLEC2D, ACAP1, LIMD2, GIMAP7, LINC01943, LCK, TRAF3IP3
## PTPN7, LINC01871, CD3G, IFITM2, ISG20, NKG7, ETS1, CD48, BTG1, CD40LG
## Negative: MARCO, C1QB, C1QA, GRN, FTL, C1QC, TYROBP, HLA-DRB5, FCER1G, LGALS3
## HLA-DRB1, CD68, CD63, CTSD, SERPINA1, HLA-DQA1, HLA-DRA, FBP1, HLA-DQB1, TSPO
## ACP5, S100A11, ANXA2, CTSS, SERPING1, MGST3, PSAP, FABP4, CAPG, S100A9
## PC_ 2
## Positive: APOE, APOC1, FTL, LYZ, CFD, NUPR1, CTSS, PLTP, SAT1, GPNMB
## PSAP, LIPA, SOD2, HLA-DPA1, NPC2, HLA-DPB1, LINC01827, CD14, HLA-DMB, TYROBP
## PLD3, RARRES1, HLA-DRA, KCNMA1, A2M, ABCA1, BLVRB, AKR1B1, CPVL, GPX3
## Negative: TOP2A, BIRC5, CDK1, PCLAF, UBE2C, NUSAP1, MKI67, TYMS, CDKN3, HMMR
## PTTG1, GTSE1, CDC20, CCNB2, ASPM, CENPF, PRC1, MAD2L1, SPC25, TK1
## RRM2, CENPM, CEP55, TPX2, CKS2, ANLN, SGO1, CCNB1, CENPA, NUF2
## PC_ 3
## Positive: MCEMP1, CD52, INHBA, RGCC, S100A4, MRC1, RETN, GPD1, AQP3, CRIP1
## PNPLA6, CALM1, MYL12A, STXBP2, FBP1, IGF1, ISG15, CCND3, HCAR2, LY6E
## MYL6, THBS1, RHBDD2, PTPMT1, PPARG, ALOX5AP, GLRX, DECR1, TSPO, LINC02345
## Negative: LYZ, APOE, TOP2A, BIRC5, CDK1, UBE2C, NUSAP1, PCLAF, MKI67, HMMR
## CDKN3, GTSE1, PLTP, CDC20, TYMS, CENPF, CCNB2, PRC1, APOC1, ASPM
## CFD, MAD2L1, MARCKS, CXCR4, NUPR1, CTSS, GPNMB, CD14, NPC2, CEP55
## PC_ 4
## Positive: PKIB, CD1C, FCGR2B, FCER1A, C15orf48, MARCKS, ACTB, LGALS2, CLEC10A, CD1E
## ACTG1, MEF2C, BASP1, GSN, EMP3, FPR3, EMP1, S100A10, LMNA, FGL2
## CCL17, RNASE6, PFN1, RNASE1, CD1A, CRIP1, MYADM, PEA15, TAGLN2, AP1S2
## Negative: CSTA, CFD, FABP4, LYZ, NUPR1, FTL, SERPING1, IGF1, CTSC, APOC1
## VMO1, PLAC8, RBP4, ALDH2, LINC01827, C1QA, STMN1, AKR1B1, C1QB, SPARC
## NUP214, MYB, C9orf16, LY86, KCNAB1, ABHD5, SERPINA1, MT2A, MARCO, LINC02154
## PC_ 5
## Positive: OTOA, CCL18, CTSL, GCHFR, TFRC, IL32, FDX1, GPNMB, CSTB, SDC2
## LIPA, SCD, CD2, RMDN3, HES2, APOC1, CA2, CCL5, TRAC, FABP3
## CD52, CD36, LGMN, CYP27A1, NR1H3, CD99, PLIN2, FN1, TRBC2, CTSB
## Negative: FCER1A, CD1C, PKIB, CD1E, MNDA, LGALS2, CLEC10A, MEF2C, HLA-DPB1, FCGR2B
## FGL2, MS4A6A, HLA-DPA1, PLAC8, HLA-DQB1, HLA-DRA, IFITM3, CCL17, CD1A, HLA-DRB1
## IGF1, THBS1, HLA-DQA1, CSTA, CLEC7A, LYZ, HLA-DRB5, CLEC5A, TYROBP, S100B
pbmc <- RunUMAP(pbmc, dims = 1:10)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 06:26:44 UMAP embedding parameters a = 0.9922 b = 1.112
## 06:26:44 Read 8923 rows and found 10 numeric columns
## 06:26:44 Using Annoy for neighbor search, n_neighbors = 30
## 06:26:44 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 06:26:47 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b14e3782a3
## 06:26:47 Searching Annoy index using 1 thread, search_k = 3000
## 06:26:50 Annoy recall = 100%
## 06:26:51 Commencing smooth kNN distance calibration using 1 thread
## 06:26:53 Initializing from normalized Laplacian + noise
## 06:26:54 Commencing optimization for 500 epochs, with 365236 positive edges
## 06:27:16 Optimization finished
saveRDS(pbmc, file = "hc51.rds")
a<-"hc_52_02.csv"
a1<-data.frame(fread(a),check.names=FALSE, row.names=1)
pbmc<- CreateSeuratObject(counts = a1, project = "h2", min.cells = 3, min.features = 200)
pbmc
## An object of class Seurat
## 14748 features across 10361 samples within 1 assay
## Active assay: RNA (14748 features, 0 variable features)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
## PC_ 1
## Positive: C1QB, C1QC, ANXA2, S100A11, CD52, ACTB, CFL1, MYL6, C1QA, CD63
## SERF2, FBP1, RETN, TFRC, CSTB, GRN, NOP10, PFN1, S100A10, MCEMP1
## TSPO, PRDX1, TMSB10, IFI6, IGFBP2, MARCO, TGM2, HSP90AA1, LGALS3, MSR1
## Negative: MALAT1, LYZ, CXCR4, CFD, BTG1, C3, RGS2, MT2A, SNHG7, MEF2C
## IL32, LINC01827, EGR1, LTB, RGS18, CD2, CORO1A, FOS, CLDND2, SNHG12
## TRAC, LGALS2, RCSD1, CD1D, CLEC4E, GPR155, LTC4S, MT1X, WFDC2, AGR3
## PC_ 2
## Positive: FTL, APOE, APOC1, GPNMB, LGALS1, OTOA, CSTB, LIPA, CTSD, PSAP
## NPC2, RARRES1, PLA2G7, PLD3, FTH1, MARCKS, A2M, CD36, CTSB, HLA-A
## BLVRB, CD14, CD48, HLA-B, CFD, SGK1, LGMN, ABCA1, SCD, SDC2
## Negative: MCEMP1, RETN, INHBA, ALOX5AP, C1QA, PLAC8, MRC1, S100A4, CRIP1, STXBP2
## HP, IGF1, C1QB, TSPO, TCF7L2, FBP1, PNPLA6, RGCC, CD52, IGFBP2
## C1QC, TMSB4X, PARAL1, MYL6, S100A9, CYSLTR1, LY6E, HLA-DRA, CYBB, FOLR3
## PC_ 3
## Positive: TMSB4X, TYROBP, FTH1, FTL, CD74, LYZ, HLA-DRA, APOC1, APOE, CFD
## C1QB, B2M, HLA-DQA1, C1QA, GRN, CTSD, NUPR1, HCST, PSAP, SRGN
## FABP4, LGALS1, ALOX5AP, NPC2, TMSB10, VIM, S100A4, SERPING1, MARCO, CD68
## Negative: SLPI, GSTA1, SMIM22, TSPAN1, AGR3, CD24, FXYD3, TFF3, C9orf24, IGFBP7
## MS4A8, CAPS, WFDC2, PERP, ANKRD44-AS1, RSPH1, CLU, FAM183A, KRT8, LRRIQ1
## TMEM190, SNTN, NQO1, C1orf194, SPATA18, DYNLRB2, ECRG4, C20orf85, C12orf75, C11orf88
## PC_ 4
## Positive: TYROBP, FTL, APOC1, FTH1, LYZ, CD74, HLA-DRA, CFD, FABP4, C1QB
## NUPR1, TMSB4X, B2M, C1QA, APOE, RBP4, HLA-DQA1, SERPING1, MARCO, MT2A
## CES1, LINC01827, TREM1, GRN, HCST, GCHFR, NPC2, PSAP, HLA-B, GLUL
## Negative: MARCKS, PLA2G7, EMP1, SDS, CCL4, SPP1, C15orf48, SNCA, CHI3L1, ACTG1
## CCL3, CCL4L2, ACTB, CCL2, CHIT1, FCGR2B, CORO1A, FNIP2, LGMN, RGCC
## MALAT1, MTHFD2, INHBA, CCL20, TNFAIP6, GBP1, SLAMF7, GDF15, SOD2, TNIP3
## PC_ 5
## Positive: LGALS3, CD52, CSTB, CD9, ACP5, FDX1, GCHFR, HES2, GDF15, OTOA
## PPDPF, PLA2G7, CD63, CA2, CHIT1, RGCC, TUBA1B, GPD1, FABP3, FTL
## HEXB, CD68, RBP4, TXN, RETREG1, CHCHD6, DBI, CYP27A1, NOP10, HS3ST2
## Negative: CCL4, CCL4L2, LYZ, TNFAIP6, HLA-DRA, CCL3, CXCL10, CD74, TNIP3, CCL20
## SOD2, CCL3L1, TMSB4X, FCGR3A, SRGN, GBP1, S100A8, SAT1, MARCKS, GCH1
## GBP5, PLEK, FOS, MT2A, B2M, MT-CO1, CXCL3, VAMP5, EGR1, CXCL11
pbmc <- RunUMAP(pbmc, dims = 1:10)
## 06:30:49 UMAP embedding parameters a = 0.9922 b = 1.112
## 06:30:49 Read 7533 rows and found 10 numeric columns
## 06:30:49 Using Annoy for neighbor search, n_neighbors = 30
## 06:30:49 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 06:30:51 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b163c8aac1
## 06:30:51 Searching Annoy index using 1 thread, search_k = 3000
## 06:30:53 Annoy recall = 100%
## 06:30:54 Commencing smooth kNN distance calibration using 1 thread
## 06:30:57 Initializing from normalized Laplacian + noise
## 06:30:57 Commencing optimization for 500 epochs, with 307648 positive edges
## 06:31:17 Optimization finished
saveRDS(pbmc, file = "hc52.rds")
a<-"hc_100_02.csv"
a1<-data.frame(fread(a),check.names=FALSE, row.names=1)
pbmc<- CreateSeuratObject(counts = a1, project = "h3", min.cells = 3, min.features = 200)
pbmc
## An object of class Seurat
## 16315 features across 6578 samples within 1 assay
## Active assay: RNA (16315 features, 0 variable features)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
## PC_ 1
## Positive: KRT78, LCN2, KRT19, ELF3, MUC21, PRSS22, ECM1, MAL, SPRR1B, KRT13
## SPRR3, LYPD2, SPRR2D, TMPRSS11B, PITX1, PPL, SPRR2A, EMP1, CEACAM6, FAM25A
## GPRC5A, SCEL, RHCG, EPS8L1, CD3E, DUSP5, IL32, TMPRSS11E, SPINK5, CDKN2B
## Negative: FTL, HLA-DRA, LYZ, TYROBP, HLA-DRB1, APOC1, CD74, C1QA, CST3, FCER1G
## APOE, C1QB, HLA-DQB1, HLA-DPA1, CTSD, CD68, GRN, ACP5, FBP1, FTH1
## HLA-DPB1, PSAP, MARCO, TSPO, FCGRT, FABP4, C1QC, CD9, CEBPB, MCEMP1
## PC_ 2
## Positive: KRT78, LCN2, MUC21, ELF3, ECM1, KRT19, PRSS22, SPRR1B, KRT13, SPRR3
## LYPD2, SPRR2D, PITX1, EPS8L1, TMPRSS11B, EMP1, PPL, FTH1, S100A9, CEACAM6
## FAM25A, SPRR2A, GPRC5A, SCEL, NIBAN2, RHCG, TMPRSS11E, IL1RN, LGALS3, SPINK5
## Negative: CD3E, IL32, TRAC, CD2, CD3D, ETS1, CD3G, LTB, TRBC2, CORO1A
## GIMAP7, LIMD2, KLRB1, TRBC1, ISG20, IL7R, CD247, ACAP1, CCL5, RORA
## SYNE2, CD7, TRAF3IP3, GZMA, BCL11B, CD69, GPR183, MT-ND3, DGKA, RIPOR2
## PC_ 3
## Positive: SLC11A1, TNFAIP2, NEAT1, TFRC, TAGLN, ITGAX, VMP1, DDHD1, INHBA, LRP1
## ZNF589, DMXL2, MYO6, CHML, UVSSA, CIITA, SIGLEC1, ZMAT1, CTNNB1, PKN2
## B3GNT5, SLC12A6, PER1, ACE, FOS, SLFN11, PTK2B, CXCL3, DUSP1, CLN8
## Negative: ACTB, ACTG1, MYL12A, S100A6, S100A11, SSR4, HLA-DPB1, HCST, SRGN, CD63
## LGALS1, FCER1G, CST3, NPC2, HLA-DPA1, C1QB, S100A9, PPDPF, HLA-DQA2, CSTB
## PPIB, ZFAS1, SEC61B, MGST3, NDUFB11, SUB1, EVI2B, TYROBP, CORO1A, GCHFR
## PC_ 4
## Positive: SPINK5, PSCA, TMPRSS11E, KRT6A, SPRR2E, S100A14, LYPD2, SPRR3, KRT13, S100A9
## MAL, CNFN, FABP4, C19orf33, SPRR2A, SPRR2D, LCN2, KRT78, KRT19, SPRR1B
## PPL, S100A8, PRSS22, CD177, S100P, APOC1, AIF1L, ELF3, GCHFR, TGM3
## Negative: TAMALIN, SNCA, FPR3, MEF2C, BCL11A, MT-ND3, FCGR2B, MAP3K20, TRAF4, SNX9
## TCF4, P2RY14, CLTC, MS4A6A, MT-ND1, PMP22, IL13RA1, METRNL, F13A1, CLDN1
## STAB1, CSF1R, MT-ND5, PCBP1, AHNAK, SPP1, MT-ND2, CLEC4F, NLRP3, ZNF503
## PC_ 5
## Positive: FABP4, MARCO, MGST3, CTSD, MCEMP1, CES1, TRAF4, APOC1, PLIN2, APOE
## FRMD8, TSPO, GZMB, FABP5, SLC3A2, MSR1, CEBPB, NUPR1, FBXO32, GNG12
## FBP1, RETN, ZNF185, GNE, ATP1B1, CRYM, MGST1, SCD, CA13, SSR4
## Negative: FGL2, LGALS2, CLEC10A, FCER1A, CD1C, CD1E, CLEC4F, KRT6A, SPRR2E, NDRG2
## LTC4S, S100B, MS4A6A, F13A1, SPINK5, PSCA, CD1A, CFP, CLDN1, S100A14
## PKIB, FCGR2B, CCL17, HLA-DQA1, CSF1R, TMPRSS11E, CNFN, AFDN, HSPA6, FPR3
pbmc <- RunUMAP(pbmc, dims = 1:10)
## 06:32:28 UMAP embedding parameters a = 0.9922 b = 1.112
## 06:32:28 Read 303 rows and found 10 numeric columns
## 06:32:28 Using Annoy for neighbor search, n_neighbors = 30
## 06:32:28 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 06:32:28 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b140dd96e9
## 06:32:28 Searching Annoy index using 1 thread, search_k = 3000
## 06:32:28 Annoy recall = 100%
## 06:32:29 Commencing smooth kNN distance calibration using 1 thread
## 06:32:29 Initializing from normalized Laplacian + noise
## 06:32:29 Commencing optimization for 500 epochs, with 11370 positive edges
## 06:32:30 Optimization finished
saveRDS(pbmc, file = "hc100.rds")
a<-"mild_141_02.csv"
a1<-data.frame(fread(a),check.names=FALSE, row.names=1)
pbmc<- CreateSeuratObject(counts = a1, project = "m1", min.cells = 3, min.features = 200)
pbmc
## An object of class Seurat
## 17670 features across 4233 samples within 1 assay
## Active assay: RNA (17670 features, 0 variable features)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
## PC_ 1
## Positive: CST3, FTL, IFI30, FTH1, TYROBP, CD68, FCER1G, CXCL10, AIF1, LYZ
## GLUL, MNDA, GRN, SERPINA1, HLA-DRA, CTSL, MS4A6A, SERPING1, SPI1, APOBEC3A
## MAFB, APOC1, MS4A4A, ATOX1, FCGR1A, C1QC, C1QB, NUPR1, HLA-DRB5, FCGR3A
## Negative: IL32, CCL5, CD52, GIMAP7, CD7, GZMA, CD8A, TRBC2, NKG7, PRF1
## CD8B, CST7, CTSW, SPOCK2, BTG1, LAG3, IL2RB, CXCR3, ITGA4, HOPX
## SH2D2A, GNG2, LBH, FASLG, CD6, AHNAK, CD69, CXCR6, LINC00861, GZMB
## PC_ 2
## Positive: WFDC2, SLPI, LCN2, KRT19, FXYD3, CXCL17, AGR2, PIGR, SCGB3A1, S100P
## TSPAN1, GSTA1, SCGB1A1, SERPINB3, ADH1C, CYP2F1, TFF3, BPIFB1, KRT18, PRSS23
## S100A14, CEACAM6, C19orf33, MUC5B, MUC5AC, LYPD2, MSMB, ASS1, AKR1C1, AQP5
## Negative: MX1, RSAD2, ISG20, IRF7, GBP1, KLF6, ISG15, STAT1, SRGN, DDX58
## IFIH1, GBP4, GBP5, MT-CO1, TNFSF10, IFIT3, STAT2, CD69, IFIT1, TYMP
## GIMAP7, CD38, NUB1, MT-CO2, S100A10, IL32, RGS1, SOCS1, APOBEC3G, IFIT2
## PC_ 3
## Positive: APOC1, NUPR1, MARCO, S100A4, VIM, FTL, FABP4, MCEMP1, APOE, C1QB
## C1QC, C1QA, RBP4, CAPG, S100A8, CYP27A1, STXBP2, PARAL1, SNX10, PELATON
## NCF2, GCHFR, AIF1, S100A11, S100A9, CES1, SERPINA1, MGST3, S100A10, PPARG
## Negative: CCR7, TSPAN13, IGKC, NR4A3, MEF2C, IDO1, BASP1, IRF8, TCF4, CD83
## LAD1, BCL11A, WDFY4, LRRK1, LAMP3, ETV6, FSCN1, NRP2, BIRC3, TBC1D8
## MARCKSL1, RGS1, NEURL3, DUSP5, CERS6, NAPSA, SLC41A2, NFKB1, CD79A, IGFBP4
## PC_ 4
## Positive: MS4A1, CD79A, IGKC, HLA-DQA1, MEF2C, BANK1, FCRLA, NAPSA, HLA-DQB1, CD19
## LINC02397, TCF4, HLA-DQA2, CPNE5, GAPT, PLD4, RHEX, HLA-DRA, JCHAIN, BLNK
## BCL11A, DERL3, HLA-DPB1, PLAC8, IGHA1, FCRL5, IGHM, IRF8, HLA-DPA1, PAX5
## Negative: S100A6, WFDC2, S100P, LCN2, IFI6, SLPI, KRT19, RSAD2, AGR2, PIGR
## GBP1, KLF6, CEACAM6, CXCL17, LYPD2, CYP2F1, ADH1C, FXYD3, ASS1, SCGB3A1
## PRSS23, GBP5, S100A14, LAG3, TSPAN1, S100A4, MDK, S100A10, KRT18, C19orf33
## PC_ 5
## Positive: MT2A, TNFSF10, IFIT3, IFIT1, IFIT2, ISG15, IFI6, ICAM2, MX1, GBP1
## GBP4, RIPOR2, LTB, DDX58, STAT1, SAT1, ISG20, SELL, RSAD2, IRF7
## SOCS1, ICOS, CD28, IFIH1, TMSB10, MAL, CTLA4, TESPA1, TNFRSF4, BCL2L14
## Negative: HOPX, ZNF683, CD8A, KLRC1, CXCR6, CD63, KLRD1, ITGAE, PLEKHF1, CCL5
## NKG7, CTSW, GPR25, LINC02446, TRG-AS1, ITGA1, CD8B, CD244, FCRL6, AOAH
## CCL4, KIR2DL4, KLRC3, CST7, ID2, CD7, TRGV7, KIR3DL2, MATK, SLAMF7
pbmc <- RunUMAP(pbmc, dims = 1:10)
## 06:32:39 UMAP embedding parameters a = 0.9922 b = 1.112
## 06:32:39 Read 1366 rows and found 10 numeric columns
## 06:32:39 Using Annoy for neighbor search, n_neighbors = 30
## 06:32:39 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 06:32:39 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b1430d8258
## 06:32:39 Searching Annoy index using 1 thread, search_k = 3000
## 06:32:39 Annoy recall = 100%
## 06:32:40 Commencing smooth kNN distance calibration using 1 thread
## 06:32:40 Initializing from normalized Laplacian + noise
## 06:32:40 Commencing optimization for 500 epochs, with 50582 positive edges
## 06:32:42 Optimization finished
saveRDS(pbmc, file = "mild141.rds")
a<-"mild_142_02.csv"
a1<-data.frame(fread(a),check.names=FALSE, row.names=1)
pbmc<- CreateSeuratObject(counts = a1, project = "m2", min.cells = 3, min.features = 200)
pbmc
## An object of class Seurat
## 17512 features across 4559 samples within 1 assay
## Active assay: RNA (17512 features, 0 variable features)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
## PC_ 1
## Positive: FTL, FTH1, TYROBP, FCER1G, CST3, CD68, IFI30, AIF1, APOC1, GLUL
## GRN, SPI1, MNDA, S100A11, NUPR1, HLA-DRA, CTSL, CXCL10, C1QA, C1QC
## APOE, MSR1, MS4A4A, LYZ, C1QB, HLA-DQA1, SERPINA1, LGALS3, S100A9, HLA-DQB1
## Negative: IL32, CCL5, CD7, IFITM1, GZMA, MALAT1, PRF1, CST7, NKG7, TRBC2
## CD8A, CD52, CD8B, CXCR3, CTSW, LAG3, IL2RB, SH2D2A, LINC01871, FASLG
## ISG20, LINC00861, GZMB, GZMH, SAMD3, CD69, ID2, TRBC1, CXCR6, GZMK
## PC_ 2
## Positive: MALAT1, RSAD2, NEAT1, MX1, PARP14, IFITM1, SIGLEC1, TNFSF10, ISG20, MARCKS
## DDX58, KLF6, IFIH1, ADA2, GBP1, CCL2, CCL8, PLA2G7, SDS, FNIP2
## RNASE1, SRGN, IRF7, LILRB2, TTYH3, FCN1, CCR1, LGMN, STAT2, VMP1
## Negative: FABP4, NUPR1, FTL, FTH1, APOC1, SLPI, ALDH2, TFF3, SCGB1A1, WFDC2
## CXCL17, MSMB, KRT19, LCN2, GSTA1, C1QA, SCGB3A1, SERPINB3, CYP2F1, C1QB
## BPIFB1, MCEMP1, MUC5B, FBP1, S100A9, CYB5A, ALDH1A1, AGR2, PRDX1, CCL18
## PC_ 3
## Positive: KRT19, LCN2, SLPI, CXCL17, S100P, S100A14, TFF3, SCGB1A1, MSMB, WFDC2
## CYP2F1, SCGB3A1, SERPINB3, MUC5B, BPIFB1, LYPD2, AGR2, C19orf33, PIGR, KRT7
## GSTA1, AKR1C1, PRSS23, FXYD3, MSLN, STEAP4, ADH1C, ADIRF, RAB25, ELF3
## Negative: VIM, APOC1, FTL, FABP4, NUPR1, C1QA, C1QB, ACTB, SPI1, C1QC
## LGALS1, FTH1, AIF1, CAPG, C1orf162, MARCO, MCEMP1, TYROBP, CCL18, CXCL16
## SNX10, FABP5, GLRX, FBP1, CD52, BCL2A1, ALDH2, APOE, NCF2, CD68
## PC_ 4
## Positive: SCGB1A1, WFDC2, TFF3, SCGB3A1, MSMB, CYP2F1, MUC5B, BPIFB1, SLPI, CXCL17
## SERPINB3, AGR2, PIGR, AKR1C1, PRSS23, GSTA1, ADH1C, FXYD3, MUC5AC, C3
## TSPAN8, LY6D, TMEM45A, CREB3L1, TSPAN1, SERPINB11, AQP5, PART1, KRT8, KRT18
## Negative: NCCRP1, KRT16, SPRR3, SDCBP2, KRT13, ECM1, NECTIN4, PRSS27, RND3, SPNS2
## TMPRSS11E, KRT78, RNF39, TGM3, HSPB8, CRISP3, TMPRSS11B, SCEL, MUC21, CYSRT1
## FAM25A, AIF1L, GPRC5A, PADI1, EPS8L1, NR1D1, GRHL1, TRIP10, ATG9B, PPL
## PC_ 5
## Positive: ZNF683, CCL5, CCL4, NKG7, CD63, CD8A, GZMH, CTSW, ITGA1, LINC02446
## CD7, CXCR6, KLRD1, CD8B, KLRC1, PLEKHF1, ITGAE, PRF1, CCL4L2, GPR25
## GZMA, HOPX, TRGV2, FCRL6, CST7, GZMB, CD244, IL2RB, MATK, CTSD
## Negative: BCL11A, IGKC, CCR7, SELL, PLD4, JCHAIN, TCF4, MS4A1, CD79A, BANK1
## SPIB, LINC02397, PLAC8, IGHM, NAPSA, CPNE5, TNFSF10, COBLL1, TSPAN13, P2RY14
## SCT, CD24, RHEX, MEF2C, LTB, EAF2, IRF8, IFIT3, CD19, RIPOR2
pbmc <- RunUMAP(pbmc, dims = 1:10)
## 06:33:01 UMAP embedding parameters a = 0.9922 b = 1.112
## 06:33:01 Read 1694 rows and found 10 numeric columns
## 06:33:01 Using Annoy for neighbor search, n_neighbors = 30
## 06:33:01 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 06:33:01 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b16718c470
## 06:33:01 Searching Annoy index using 1 thread, search_k = 3000
## 06:33:01 Annoy recall = 100%
## 06:33:01 Commencing smooth kNN distance calibration using 1 thread
## 06:33:02 Initializing from normalized Laplacian + noise
## 06:33:02 Commencing optimization for 500 epochs, with 63580 positive edges
## 06:33:04 Optimization finished
saveRDS(pbmc, file = "mild142.rds")
a<-"mild_144_02.csv"
a1<-data.frame(fread(a),check.names=FALSE, row.names=1)
pbmc<- CreateSeuratObject(counts = a1, project = "m3", min.cells = 3, min.features = 200)
pbmc
## An object of class Seurat
## 15374 features across 913 samples within 1 assay
## Active assay: RNA (15374 features, 0 variable features)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
## PC_ 1
## Positive: CTSL, SGK1, APOBEC3A, LILRB2, IL4I1, SIGLEC1, OLR1, LILRB3, C15orf48, SDS
## IL1RN, SLC11A1, PLAUR, FN1, SLC43A2, NEAT1, LGMN, EMP1, CTSD, CMKLR1
## SERPINA1, SIRPA, CCL2, FCN1, GRINA, LILRB4, DEFB1, CD83, CCL8, DUSP1
## Negative: SCGB3A1, MSMB, SLPI, SCGB1A1, WFDC2, TFF3, CXCL17, KRT19, BPIFB1, CCL5
## MUC5B, AGR2, SERPINB3, PIGR, MT1E, LCN2, S100P, KRT18, CYP2F1, AKR1C1
## GSTA1, HOPX, NKG7, CD2, C19orf33, MUC5AC, LYPD2, ZNF683, CXCL6, PSCA
## PC_ 2
## Positive: CST3, FTH1, HLA-DRA, IFI27, FTL, HLA-DQA1, LYZ, TYROBP, SLPI, AIF1
## C1QC, CD74, HLA-DQB1, SCGB1A1, C1QB, WFDC2, MSMB, C1QA, TFF3, SPI1
## CXCL17, HLA-DRB5, NPC2, HLA-DPA1, SCGB3A1, KRT19, BPIFB1, HLA-DPB1, FCER1G, VMO1
## Negative: CCL5, CD7, PRF1, MALAT1, CD2, NKG7, CD3G, CTSW, GZMB, LAG3
## GZMA, CD8A, KLRC1, AHNAK, ALOX5AP, CST7, CD8B, CLEC2D, PTPN22, IFITM1
## CXCR6, SLFN5, GZMH, CBLB, SH2D2A, IKZF3, SPOCK2, CCL4, CD44, ITGAE
## PC_ 3
## Positive: DRC3, DNAH9, LDLRAD1, CCDC189, CCDC33, C4orf47, FAM166B, CCDC78, DNAH10, TSNAXIP1
## WDR38, SPAG17, RSPH1, ELF3, CCDC170, DNALI1, CAPS, DNAH12, CCDC173, CFAP43
## LRRC23, CFAP157, CCDC191, EPHX2, RPGRIP1L, AGBL4, EFHC1, MAP3K19, TXLNB, DNAI2
## Negative: FTL, HLA-DPB1, HLA-DPA1, TYROBP, AIF1, HLA-DRB1, HLA-DQB1, C1QC, C1QA, SPI1
## C1QB, HLA-DQA1, FCER1G, HLA-DRB5, TUBA1B, MS4A6A, HLA-DRA, CST3, IFI30, NPC2
## CALHM6, CD68, LYZ, LGALS1, MNDA, LGALS2, VIM, CD74, PSAP, FGL2
## PC_ 4
## Positive: HLA-DPB1, HLA-DPA1, C1QC, HLA-DQA1, C1QA, C1QB, HLA-DRA, AIF1, SPI1, TYROBP
## HLA-DQB1, HLA-DRB1, CALHM6, CD74, HLA-DRB5, MS4A6A, LGALS2, FCER1G, FTL, DNAH12
## DRC3, CCDC189, SPAG17, TSNAXIP1, CCDC78, C4orf47, DNAH9, SLC40A1, LDLRAD1, CCDC173
## Negative: WFDC2, MSMB, SCGB1A1, TFF3, CXCL17, SLPI, S100P, LCN2, BPIFB1, SCGB3A1
## KRT19, AKR1C1, CYP2F1, AGR2, SERPINB3, MDK, KRT18, MUC5B, S100A6, PRSS23
## C19orf33, LYPD2, CEACAM6, MUC5AC, PIGR, KRT7, PSCA, S100A14, CXCL6, FXYD3
## PC_ 5
## Positive: IGFBP7, C9orf24, C20orf85, VEGFA, NR4A3, HAPLN3, SNTN, C1orf194, RGS1, CD38
## PML, ETV6, CFAP43, CAPSL, PIFO, CREM, SAXO2, TMEM190, AREG, MORN2
## SCIN, IGFBP2, C9orf135, ISG15, FAM229B, PPP1R42, NT5C3A, PITPNA, DUSP2, IDO1
## Negative: RND3, ULK1, GLB1L, LIMS2, MAN2C1, MNT, PTPRM, MARCO, ZNF469, PDK4
## PLD3, ADCK5, PLIN2, SEPTIN10, APOC1, H2BC8, CDH23, GPNMB, AHI1, PTPN13
## PPP2R3B, H3C7, CASP5, SLC3A2, TIMP2, RRP12, DPY19L2, DNAAF1, NR1H3, PITPNM1
pbmc <- RunUMAP(pbmc, dims = 1:10)
## 06:33:19 UMAP embedding parameters a = 0.9922 b = 1.112
## 06:33:19 Read 375 rows and found 10 numeric columns
## 06:33:19 Using Annoy for neighbor search, n_neighbors = 30
## 06:33:19 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 06:33:19 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b13469270d
## 06:33:19 Searching Annoy index using 1 thread, search_k = 3000
## 06:33:19 Annoy recall = 100%
## 06:33:19 Commencing smooth kNN distance calibration using 1 thread
## 06:33:20 Initializing from normalized Laplacian + noise
## 06:33:20 Commencing optimization for 500 epochs, with 13450 positive edges
## 06:33:20 Optimization finished
saveRDS(pbmc, file = "mild144.rds")
a<-"severe_143_02.csv"
a1<-data.frame(fread(a),check.names=FALSE, row.names=1)
pbmc<- CreateSeuratObject(counts = a1, project = "s1", min.cells = 3, min.features = 200)
pbmc
## An object of class Seurat
## 18344 features across 20286 samples within 1 assay
## Active assay: RNA (18344 features, 0 variable features)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
## PC_ 1
## Positive: CD2, IL32, CD3E, PTPRCAP, CD7, GZMA, CD3D, CCL5, CD247, TRBC2
## LCK, OCIAD2, CD96, PRF1, LIMD2, LDHB, SPOCK2, GZMB, SEPTIN1, SH2D2A
## IL2RB, LTB, SYNE2, ETS1, CLEC2D, RPSA, CD3G, ZAP70, MZT2A, RPS4X
## Negative: CTSL, CCL2, CST3, CTSB, CTSD, IL1RN, APOBEC3A, S100A9, S100A8, PLAUR
## LGALS3, MAFB, SGK1, LYZ, FCN1, TIMP1, CCL3, CXCL10, CSTB, HLA-DRA
## IGSF6, S100A6, DEFB1, LILRB4, CCL8, IDO1, SLC11A1, CALHM6, ATF5, CD163
## PC_ 2
## Positive: HCAR3, FCGR3B, HCAR2, PLAU, TNFAIP6, G0S2, CCL3L1, CXCL8, CCL4L2, PI3
## HES4, CXCR4, VEGFA, CARD17, RSAD2, CLEC4E, RGL4, PPIF, SLPI, S100P
## FFAR2, OSM, IRAK2, CMTM2, S100A8, NFKBIA, KATNBL1, DDIT3, SELL, GADD45B
## Negative: CD74, RPS27, HLA-DPA1, CTSB, HLA-DRB1, CST3, HLA-DPB1, RPS18, CTSL, PPDPF
## RPS15A, HLA-DRA, RPS6, CALR, RPS3, CTSZ, HLA-DRB5, RPS5, FABP5, HSPA8
## RPS4X, CTSC, HSP90AB1, RPLP0, RPL3, GSTP1, ACP5, LILRB4, HSPB1, CHCHD10
## PC_ 3
## Positive: ELF3, KRT8, KRT18, KRT19, CLDN4, WFDC2, CAPS, C9orf24, C20orf85, FAM183A
## SMIM22, TACSTD2, TMEM190, CLDN3, MUC4, TPPP3, FXYD3, CCDC78, CXCL17, CLDN7
## C1orf194, RSPH1, C9orf116, ZMYND10, TSPAN1, DNALI1, NUPR1, DYNLRB2, CD24, LCN2
## Negative: S100A4, CTSC, TNFSF10, APOBEC3A, CORO1A, CTSL, CD52, CCL2, FCN1, CTSB
## PTPRE, LYZ, EVL, GIMAP7, CXCL10, CCL3, MAFB, DUSP6, CD2, ATF5
## IL1RN, GZMA, NCF1, RIN2, NKG7, LILRA5, DEFB1, CD7, IL4I1, CCL8
## PC_ 4
## Positive: TNFSF10, DUSP6, APOBEC3A, FCN1, LILRA5, WARS1, IFIT1, ACTG1, RIN2, PTPRE
## IL1RN, VCAN, NCF1, MAFB, S100A12, ATF5, CLU, SERPINB1, CXCL11, S100A4
## S100A9, SERPINB9, IFIT2, S100A8, CXCL10, PLAC8, LYZ, OASL, ACOD1, CD300E
## Negative: APOE, SERPINH1, HSPB1, HSPA6, C1QC, C1QA, C1QB, DNAJB1, APOC1, BAG3
## FKBP4, CCL18, HSPH1, DNAJA4, HSPA1B, HSPD1, GPNMB, HSPA1A, IER5, HSPE1
## ZFAND2A, CACYBP, G0S2, TCEAL9, CRYAB, PLAU, LGMN, GADD45B, SCD, NRP2
## PC_ 5
## Positive: DNAJB1, HSPA1A, HSPA1B, HSPA6, HSPH1, BAG3, HSP90AA1, DNAJA4, HSPD1, HSPB1
## FKBP4, ZFAND2A, CACYBP, HSP90AB1, SERPINH1, HSPE1, HSPA8, IER5, OLIG1, S100A12
## FCN1, S100A9, UBB, CRYAB, NR4A1, S100A8, ENGASE, DNAJB4, TENT5A, MRPL18
## Negative: C1QC, C1QB, C1QA, APOE, APOC1, GPNMB, NR1H3, LGMN, HLA-DQA1, CCL18
## PLD3, FPR3, A2M, MSR1, TREM2, SCD, KCNMA1, NUPR1, GCHFR, MMP14
## PLTP, VSIG4, TSPAN4, MGLL, CD276, FN1, PLA2G7, GM2A, NRP2, VAT1
pbmc <- RunUMAP(pbmc, dims = 1:10)
## 06:33:56 UMAP embedding parameters a = 0.9922 b = 1.112
## 06:33:56 Read 12840 rows and found 10 numeric columns
## 06:33:56 Using Annoy for neighbor search, n_neighbors = 30
## 06:33:56 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 06:33:58 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b12d3f757b
## 06:33:58 Searching Annoy index using 1 thread, search_k = 3000
## 06:34:02 Annoy recall = 100%
## 06:34:02 Commencing smooth kNN distance calibration using 1 thread
## 06:34:03 Initializing from normalized Laplacian + noise
## 06:34:03 Commencing optimization for 200 epochs, with 506242 positive edges
## 06:34:08 Optimization finished
saveRDS(pbmc, file = "severe143.rds")
a<-"severe_145_02.csv"
a1<-data.frame(fread(a),check.names=FALSE, row.names=1)
pbmc<- CreateSeuratObject(counts = a1, project = "s2", min.cells = 3, min.features = 200)
pbmc
## An object of class Seurat
## 17524 features across 17395 samples within 1 assay
## Active assay: RNA (17524 features, 0 variable features)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
## PC_ 1
## Positive: CTSL, FTL, CTSB, IFI30, PSAP, CTSD, CST3, CCL2, HLA-DRA, CD68
## CSTB, GLUL, HLA-DRB1, CD63, MAFB, GRN, CD163, LGALS3, TIMP1, MS4A4A
## CD74, LGMN, PLAUR, SERPINA1, HLA-DRB5, CCL8, SAT1, HLA-DPA1, HMOX1, DEFB1
## Negative: CCL5, PTPRCAP, IL32, CD7, CD3E, CD2, GZMA, GZMB, PRF1, CD3D
## NKG7, GNLY, CST7, TRBC2, SYNE2, CD247, CD8B, CD8A, LINC01871, GZMH
## C12orf75, GZMK, OCIAD2, CLIC3, ETS1, LCK, GNG2, TRAC, CD3G, CD96
## PC_ 2
## Positive: CCL5, PTPRCAP, CD7, CD2, GZMA, CD3E, IL32, GZMB, PRF1, NKG7
## CD3D, GNLY, CST7, CD247, TRBC2, GZMH, LINC01871, CD8B, CD8A, GZMK
## GNG2, ETS1, KLRD1, CD3G, LCK, CD96, TRAC, CTSW, IL2RB, SH2D2A
## Negative: KRT8, CD24, CYP4B1, TSPAN1, KRT19, FXYD3, SMIM22, FAM183A, C9orf24, RSPH1
## C20orf85, WFDC2, GSTA1, C1orf194, KRT18, TACSTD2, CAPS, MUC4, DYNLRB2, C19orf33
## CHST9, AGR3, PIFO, PIGR, LRRIQ1, ALDH1A1, SLPI, MS4A8, ELF3, PERP
## PC_ 3
## Positive: CYBA, LGALS1, CD74, HLA-DRB5, RPS18, RPL18A, C1QA, RPLP0, ALDOA, HCST
## CD68, CALR, RPS5, RPSA, HLA-DRB1, C1QC, MTRNR2L12, CAPG, RPS4X, MT-ND4L
## ATP6V0C, HLA-DPB1, SYNGR2, PTMS, HLA-DQA1, PRDX1, RPL10A, HLA-DMA, C1QB, CTSC
## Negative: S100A8, IFIT2, CCL2, IFIT1, RSAD2, CXCL10, TNFAIP6, SLPI, S100A12, IFIT3
## CEACAM1, CCL3L1, CXCL8, WFDC2, SCGB3A1, HCAR3, MSMB, BPIFA1, STEAP4, CH25H
## PROK2, SAT1, RSPH1, KCNJ15, SCGB1A1, TNFSF10, AGR2, SERPINB3, GSTA1, LYZ
## PC_ 4
## Positive: IL1RN, CXCL10, TNFSF10, ISG15, CCL3L1, FCN1, IFIT1, APOBEC3A, IFIT2, LYZ
## PLAC8, S100A8, IFIT3, ISG20, CXCL11, IDO1, RSAD2, HPSE, S100A4, SERPINB9
## HES4, ZFP36, DEFB1, IL27, IFITM1, CCL3, OLR1, MT2A, ATF5, JUNB
## Negative: APOE, CCL18, LGMN, APOC1, GPNMB, C1QC, C1QB, CTSD, RNASE1, CTSL
## FABP5, FTL, SMPDL3A, C1QA, NR1H3, NRP2, ZNF638, PLIN2, CSTB, TSPAN4
## PLA2G7, CTSZ, LMNA, PLD3, CYB5A, PLTP, PLPP3, CD163, VAT1, ACP2
## PC_ 5
## Positive: C9orf24, RSPH1, C20orf85, FAM183A, C1orf194, DYNLRB2, PIFO, LRRIQ1, C9orf116, CAPS
## SNTN, MORN2, DNAH12, C9orf135, C5orf49, CFAP300, FAM229B, CFAP53, MORN5, ENKUR
## KIF9, C11orf88, LDLRAD1, CCDC146, TPPP3, DNAH10, EFCAB1, PPIL6, CCDC170, CAPSL
## Negative: LYPD2, CEACAM6, CXCL17, LCN2, KRT19, TACSTD2, KRT4, GPRC5A, KRT7, CEACAM5
## F3, S100P, MUC1, AQP5, LMO7, GABRP, KLF5, SLC6A14, SERPINB3, S100A14
## STEAP4, C19orf33, MSMB, EPS8L1, MALL, RND3, KRT13, ELF3, CXCL6, MSLN
pbmc <- RunUMAP(pbmc, dims = 1:10)
## 06:36:01 UMAP embedding parameters a = 0.9922 b = 1.112
## 06:36:01 Read 13807 rows and found 10 numeric columns
## 06:36:01 Using Annoy for neighbor search, n_neighbors = 30
## 06:36:01 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 06:36:02 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b16192a22b
## 06:36:02 Searching Annoy index using 1 thread, search_k = 3000
## 06:36:06 Annoy recall = 100%
## 06:36:07 Commencing smooth kNN distance calibration using 1 thread
## 06:36:08 Initializing from normalized Laplacian + noise
## 06:36:08 Commencing optimization for 200 epochs, with 549062 positive edges
## 06:36:13 Optimization finished
saveRDS(pbmc, file = "severe145.rds")
a<-"severe_146_02.csv"
a1<-data.frame(fread(a),check.names=FALSE, row.names=1)
pbmc<- CreateSeuratObject(counts = a1, project = "s3", min.cells = 3, min.features = 200)
pbmc
## An object of class Seurat
## 16544 features across 3398 samples within 1 assay
## Active assay: RNA (16544 features, 0 variable features)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
## PC_ 1
## Positive: ELF3, CLDN4, CAPS, TNFRSF12A, KRT19, CFAP157, GSTP1, SNHG12, CLDN3, SOX4
## GPX4, UBXN11, LRRC23, TUBB4B, FOXJ1, NUPR1, KRT18, PPDPF, H4C3, RRAD
## RPS18, PRDX5, RPLP0, PLXNB2, KRT8, GDF15, SNHG8, ZMYND10, ATP5IF1, DRC3
## Negative: CCL3L1, FTL, APOBEC3A, HCAR3, CXCL8, G0S2, PI3, PLAUR, CCL4L2, S100A12
## IL1B, TXNIP, CCL3, PROK2, C15orf48, OSM, S100A4, ZFP36, ATP2B1-AS1, CCL4
## CD177, ANXA2R, HMOX1, CD22, CD14, NPC2, KLF2, DUSP6, TMSB4X, SLAMF7
## PC_ 2
## Positive: LGALS1, HLA-DRB1, HLA-DRA, HLA-DPA1, HLA-DRB5, CTSB, VIM, CTSL, CD74, HLA-DPB1
## PPIA, CAPG, CST3, RPS24, LILRB4, HAVCR2, CD81, EEF1A1, ANXA5, LAIR1
## ATP1B3, RPL10, CTSC, PTMA, CD68, ACP5, HCST, ADA2, CTSZ, PRDX1
## Negative: CFAP157, ELF3, CAPS, CLDN4, SLPI, CLDN3, DRC3, ZMYND10, FOXJ1, KRT19
## FAM183A, RSPH1, LRRIQ1, CCDC40, MUC4, C9orf24, LRRC46, RRAD, CCDC114, SOX4
## C20orf85, DNAH11, SPAG17, HYDIN, VWA3A, TNFRSF12A, C1orf194, LRRC23, WDR60, KRT8
## PC_ 3
## Positive: PTPRCAP, CD2, CD3E, CD3D, PRF1, TRBC2, CD247, LCK, TRAC, CD96
## CCL5, GZMA, CD3G, CD7, GZMB, GIMAP7, CD8A, LAG3, ZAP70, SPOCK2
## NKG7, FYN, CHST12, GZMM, GPRIN3, IKZF3, CTSW, EVL, IL2RB, CD6
## Negative: CTSL, FTL, CST3, CTSB, LILRB4, CD68, CTSZ, HLA-DRA, CD163, LGMN
## PLA2G7, DAB2, PLD3, SDS, FNIP2, KYNU, IL4I1, C15orf48, GPNMB, MMP19
## CREG1, MSR1, LAIR1, ATOX1, MAFB, C1QA, RGL1, LHFPL2, GRN, CPM
## PC_ 4
## Positive: PIK3R3, PLCG2, DNAH11, HEXIM1, AHI1, PIEZO1, WDR6, MUC16, ID2, SPG11
## MACC1, KCNQ1OT1, EFHC1, HSPA2, H3C1, H4C2, CDKN2B, TAF4B, CDRT1, PLK2
## SPAG17, ATP2C2, H2AC17, BEST4, MAFB, H4-16, MUC4, DLEC1, FBXW10, DOC2A
## Negative: C20orf85, PRDX5, C9orf116, IGFBP2, TUBA4B, C1orf194, C9orf24, CAPS, GSTP1, C5orf49
## NUPR1, FAM183A, ATP5IF1, RNF187, KRT8, KRT18, IGFBP7, CATSPERD, MORN2, CLDN3
## DMKN, DNALI1, WDR38, ROPN1L, RRAD, PIFO, TPPP3, PPIL6, DYNLRB2, PLPP2
## PC_ 5
## Positive: DNAH3, CFAP47, DLEC1, EFHC1, DNAH12, TTLL10, VWA3A, DNAH6, RP1, LINC02832
## HYDIN, DTHD1, CDHR3, LRRC74B, CFAP65, CCDC17, DNAH9, DNAI1, CFAP100, LRRIQ1
## DOC2A, CDHR4, CFAP54, CCDC40, SPAG17, CFAP46, SAXO2, CFAP43, CCDC30, CROCC2
## Negative: IGFBP3, LMO7, LAMB3, MACC1, KRT7, LRRC8A, EPS8L1, TSHZ2, TACSTD2, PRSS23
## MAL2, ADIRF, ASS1, TM4SF1, ZNF431, GALNT5, SCGB1A1, S100A2, FGFR1, SPINK5
## MISP, H2AC11, SOX4, DST, RND3, LMTK3, MUC5AC, CDKN2B, EPS8L2, FSTL3
pbmc <- RunUMAP(pbmc, dims = 1:10)
## 06:37:48 UMAP embedding parameters a = 0.9922 b = 1.112
## 06:37:48 Read 2256 rows and found 10 numeric columns
## 06:37:48 Using Annoy for neighbor search, n_neighbors = 30
## 06:37:48 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 06:37:49 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b13298d258
## 06:37:49 Searching Annoy index using 1 thread, search_k = 3000
## 06:37:49 Annoy recall = 100%
## 06:37:50 Commencing smooth kNN distance calibration using 1 thread
## 06:37:50 Initializing from normalized Laplacian + noise
## 06:37:50 Commencing optimization for 500 epochs, with 92048 positive edges
## 06:37:53 Optimization finished
saveRDS(pbmc, file = "severe146.rds")
a<-"severe_148_02.csv"
a1<-data.frame(fread(a),check.names=FALSE, row.names=1)
pbmc<- CreateSeuratObject(counts = a1, project = "s4", min.cells = 3, min.features = 200)
pbmc
## An object of class Seurat
## 17460 features across 2398 samples within 1 assay
## Active assay: RNA (17460 features, 0 variable features)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
## PC_ 1
## Positive: CD3E, IL32, CD7, CD3D, PTPRCAP, CD2, ETS1, TRBC2, CD247, CLEC2D
## OCIAD2, LCK, SH2D2A, RORA, SEPTIN1, LDHB, CCL5, IL7R, SPOCK2, SYNE2
## ZAP70, RPL5, RPSA, PRF1, CD6, CD96, PDCD4, TRAC, NKG7, GZMM
## Negative: FTL, IFI30, LYZ, CCL2, IL1RN, CTSL, APOBEC3A, CTSB, CD68, S100A9
## MAFB, TIMP1, FCN1, S100A8, CD300E, SGK1, CTSD, C15orf48, GLUL, PLAUR
## EMP1, HLA-DRA, CD14, CCL8, CD63, CYBB, MS4A6A, CD163, TGFBI, DUSP6
## PC_ 2
## Positive: FCGR3B, IFIT2, IFIT3, S100A8, TNFAIP6, G0S2, IFIT1, CMTM2, S100A9, CXCR2
## PDE4B, SELL, HCAR3, TNFSF10, PROK2, RSAD2, ALOX5AP, ADGRG3, HCAR2, FFAR2
## ZBP1, MNDA, PI3, GBP5, PLEK, CXCL8, ALPL, PHACTR1, IL1RN, IL1B
## Negative: LGALS3, S100A10, EEF1A1, RPS8, GSTP1, CSTB, RPL13, PRDX1, RPS12, LMNA
## PDLIM1, CTSB, CD74, VIM, CD9, CALR, CLDN7, ANXA1, CD63, RPS27
## PPDPF, TUBB4B, UBC, RPLP0, HSP90AA1, RPS18, RHOB, RPS5, RPS15A, SSR4
## PC_ 3
## Positive: ELF3, KRT8, CLDN7, AKR1C2, PDLIM1, KIAA1522, SMIM22, KRT19, MUC4, CLDN4
## CAPS, SLPI, CD24, RSPH1, GPX2, KRT7, SOX4, CLDN3, MAL2, C19orf33
## PPL, PPIL6, TSPAN1, KRT18, KLF5, TACSTD2, WFDC2, FXYD3, GSTA1, CXCL17
## Negative: VIM, EEF1A1, RPS27, RPS12, RPS3, RPL23A, RPS29, CALR, PABPC1, RPS4X
## RPS27A, RPS15A, RPL13, RPS6, RPL3, RPS8, CD3E, CD7, EMP3, RPSA
## HSPA8, RPS5, IL32, RPL5, CD3D, PTPRCAP, RPS18, RPS3A, RGS1, CD74
## PC_ 4
## Positive: APOE, C1QC, APOC1, C1QA, C1QB, GPNMB, CCL18, LGMN, NR1H3, TREM2
## FABP5, CD276, HLA-DQA1, VAT1, SCD, GCHFR, ACP2, CD9, PLTP, A2M
## PMP22, CREG1, FTL, CD81, MRAS, HLA-DPB1, APOC2, TUBB, HLA-DPA1, SPP1
## Negative: S100A12, FCN1, VCAN, APOBEC3A, ZFP36, CKAP4, CD300E, HPSE, S100A9, S100A8
## LYZ, TNFSF10, FOS, LILRA5, WARS1, RIN2, NEXN, MAFB, RSAD2, NCF1
## DUSP6, CFP, EMP1, GBP1, IFIT2, GCH1, FOSB, IL1RN, NLRP3, CCL7
## PC_ 5
## Positive: ADIRF, KLK11, DSG2, MET, EFNA1, C19orf33, S100A2, ASS1, SULT2B1, MSLN
## FSTL1, CEACAM5, VSIG2, LY6D, S100A14, MYO5B, HSPB8, CAV2, TM4SF1, KRTCAP3
## FGFBP1, MAL2, S100A16, AGR2, EPS8L1, BCAR1, FST, ITPKC, GPRC5A, KRT15
## Negative: PPIL6, CFAP157, PACRG, GDF15, CFAP53, H2BC18, GULP1, CCDC78, RSPH1, PIFO
## MAP1B, GIHCG, SPEF1, IQCK, CSKMT, C1orf194, CFAP45, MOV10L1, SH3BGR, C9orf24
## CAPS, SLC25A4, AARS1, COL1A1, ALDH3A1, DNAH11, H2BC9, SOX2, PABPC1L, GTF2IRD1
pbmc <- RunUMAP(pbmc, dims = 1:10)
## 06:38:15 UMAP embedding parameters a = 0.9922 b = 1.112
## 06:38:15 Read 697 rows and found 10 numeric columns
## 06:38:15 Using Annoy for neighbor search, n_neighbors = 30
## 06:38:15 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 06:38:15 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b16594c19b
## 06:38:15 Searching Annoy index using 1 thread, search_k = 3000
## 06:38:15 Annoy recall = 100%
## 06:38:15 Commencing smooth kNN distance calibration using 1 thread
## 06:38:16 Initializing from normalized Laplacian + noise
## 06:38:16 Commencing optimization for 500 epochs, with 26150 positive edges
## 06:38:18 Optimization finished
saveRDS(pbmc, file = "severe148.rds")
a<-"severe_149_02.csv"
a1<-data.frame(fread(a),check.names=FALSE, row.names=1)
pbmc<- CreateSeuratObject(counts = a1, project = "s5", min.cells = 3, min.features = 200)
pbmc
## An object of class Seurat
## 16724 features across 2483 samples within 1 assay
## Active assay: RNA (16724 features, 0 variable features)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
## PC_ 1
## Positive: CST3, CTSL, IFI30, FTL, TYROBP, CTSD, FCER1G, CD68, CTSB, CCL2
## LGALS1, CD63, MS4A4A, CD163, CCL8, PSAP, FTH1, GLUL, MAFB, TIMP1
## GRN, NPC2, TYMP, AIF1, LGALS3, CREG1, FCGRT, HLA-DRA, MS4A6A, MARCKS
## Negative: IL32, CD3E, PTPRCAP, EVL, SPOCK2, IL7R, CD2, CD3D, LIMD2, CD247
## GPR171, RAC2, OCIAD2, LCK, BTG1, LDHB, ETS1, ZFP36L2, CCL5, CD7
## SYNE2, CD69, LTB, TRBC2, GIMAP7, PRF1, CLEC2D, ODF2L, PARP8, PIM2
## PC_ 2
## Positive: SRGN, CD69, CD3E, RAC2, PTPRCAP, IL7R, ALOX5AP, CD52, LIMD2, IL32
## CD2, GIMAP7, EVL, SPOCK2, CCL5, CD3D, CCL4, LSP1, FTL, BTG1
## EMP3, CD247, LCK, GPR171, TYROBP, PRF1, CYTIP, LYAR, NKG7, TRBC2
## Negative: TACSTD2, ELF3, CLDN7, KRT7, MUC4, C19orf33, CXCL17, WFDC2, GPRC5A, CEACAM6
## CLDN4, MAL2, LMO7, KRT19, ALDH1A3, ADGRF1, FXYD3, S100A14, PLAAT2, SLC6A14
## SDCBP2, F3, C1orf116, MUC1, MUC16, ASS1, TM4SF1, CD24, SDC1, AGR2
## PC_ 3
## Positive: FCGR3B, CSF3R, S100A8, SLC25A37, FPR1, CLEC4E, S100A9, BCL2A1, SELL, MNDA
## IFIT2, IL1RN, TNFAIP6, HES4, NCF1, HCAR3, HCAR2, CEACAM1, ISG15, SAT1
## TNFSF10, CLEC4A, P2RY13, FTH1, MT2A, CMTM2, SOD2, IGSF6, PHACTR1, TLR2
## Negative: MTRNR2L12, JUN, CCL5, NKG7, CALR, ARL4C, PRF1, CD3E, LAG3, IL32
## S100A10, ANXA1, CD2, ZFP36L2, CALM1, CTSC, CD3D, BTG1, CD7, PTPRCAP
## GZMB, GZMA, CD52, CD8A, EVL, PLAAT4, LCK, CLIC3, HLA-DPA1, CD8B
## PC_ 4
## Positive: SLC6A14, MALL, S100A14, CEACAM6, SPINT1, CEACAM5, SCEL, EHF, PRSS8, KRT15
## SULT2B1, GPRC5A, SDC1, PLS3, S100A16, ASS1, PODXL, MPZL2, NEAT1, MUC21
## KLK10, MYH14, VSIG2, GRHL3, SDCBP2, SPNS2, PPL, LRRC8A, LRATD1, AQP5
## Negative: MORN2, C1orf194, SNTN, DNALI1, CAPS, CETN2, FAM81B, TMEM190, C20orf85, RSPH1
## PIFO, C9orf24, CFAP126, PPIL6, CLDN3, DPCD, TSPAN19, TUBA4B, TPPP3, C9orf116
## PFN2, IFT22, FAM183A, C5orf49, EFCAB1, SAA1, WDR38, ANKRD44-AS1, C9orf135, AKAP14
## PC_ 5
## Positive: APOE, SLC16A10, AREG, TSC22D1, VEGFA, H2AC6, G0S2, SDC2, APOC1, CREM
## ATP13A3, CD9, GPR183, ZNF331, SDS, ELL2, SOD2, TMEM38B, RGS1, PMP22
## HLA-DQA1, CHMP1B, CD83, AVPI1, RAB7B, BTG1, SQSTM1, TREM2, IL1R1, CKS2
## Negative: ACTB, FCN1, C1orf162, LILRA5, MS4A6A, ACTG1, SSB, S100A8, CXCL10, S100A4
## S100A9, BLVRA, LYZ, NKG7, TYMP, CBR1, PLBD1, CMKLR1, APOBEC3A, PRF1
## CCL5, PLAAT4, TNFSF10, GZMA, CD8A, MPEG1, HSPB1, ASGR1, GZMB, CD38
pbmc <- RunUMAP(pbmc, dims = 1:10)
## 06:38:31 UMAP embedding parameters a = 0.9922 b = 1.112
## 06:38:31 Read 1702 rows and found 10 numeric columns
## 06:38:31 Using Annoy for neighbor search, n_neighbors = 30
## 06:38:31 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 06:38:32 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b11b818c2c
## 06:38:32 Searching Annoy index using 1 thread, search_k = 3000
## 06:38:32 Annoy recall = 100%
## 06:38:33 Commencing smooth kNN distance calibration using 1 thread
## 06:38:34 Initializing from normalized Laplacian + noise
## 06:38:34 Commencing optimization for 500 epochs, with 64604 positive edges
## 06:38:36 Optimization finished
saveRDS(pbmc, file = "severe149.rds")
a<-"severe_152_02.csv"
a1<-data.frame(fread(a),check.names=FALSE, row.names=1)
pbmc<- CreateSeuratObject(counts = a1, project = "s6", min.cells = 3, min.features = 200)
pbmc
## An object of class Seurat
## 17352 features across 6960 samples within 1 assay
## Active assay: RNA (17352 features, 0 variable features)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
## PC_ 1
## Positive: CST3, C15orf48, MAFB, TIMP1, GLUL, IFI30, BCL2A1, S100A9, CD163, SOD2
## PLAUR, EMP1, S100A8, LYZ, SERPINA1, FCGR2A, LGALS3, CCL7, PLEK, CTSZ
## HLA-DRA, IGSF6, CYP1B1, MARCKS, FCGR1A, PLIN2, MMP19, LAIR1, C5AR1, FNIP2
## Negative: IL32, CD27, CCL5, GZMA, CD3E, PTPRCAP, GZMB, LIMD2, CD3D, CD7
## GNLY, CD2, CD8A, PRF1, TRBC2, CD8B, NKG7, LDHB, HMGB1, SEPTIN1
## LINC01871, LCK, C12orf75, CLEC2D, EVL, TRAC, LIME1, CD3G, ETS1, GZMH
## PC_ 2
## Positive: NKG7, CCL5, GZMA, GZMB, CD3E, CD7, CD3D, CD8A, PRF1, CD2
## CD8B, GNLY, LINC01871, PLAAT4, TRBC2, IL32, CST7, EVL, LCK, CD3G
## GZMH, LAG3, CLIC3, GZMK, CBLB, ARL4C, CD96, LBH, TRAC, LIMD2
## Negative: JCHAIN, MZB1, DERL3, HSP90B1, IGHG1, IGHG4, IGLC3, IGHV4-34, SEC11C, IGLV3-19
## POU2AF1, FKBP11, IGHA1, TXNDC5, MYDGF, CPNE5, PRDX4, CD79A, IGLL5, TNFRSF17
## LMAN1, SSR4, SDF2L1, HSPA5, ELL2, SDC1, TENT5C, CCR10, SYVN1, ITM2C
## PC_ 3
## Positive: CD74, PPIB, HSP90B1, MANF, MYDGF, IGLC3, MZB1, IGHG4, HSPA5, PDIA4
## FKBP11, TMED9, JCHAIN, SEC11C, IGHG1, DERL3, S100A8, LMAN1, RPN2, POU2AF1
## IFI30, CCL7, ERLEC1, CD38, HSPD1, SRM, DERL1, BCL2A1, SMC4, CD163
## Negative: KRT19, KRT7, KLF5, C19orf33, KRT8, ELF3, SFN, TSPAN1, KRT17, CD24
## CAPS, RND3, GRAMD2B, ZMYND10, FAM183A, KRT18, CFAP77, SMIM22, RSPH4A, LRRIQ1
## CCDC190, SAXO2, CLDN3, AGR2, C9orf135, C9orf116, FXYD3, DNALI1, ADH7, C1orf194
## PC_ 4
## Positive: SIT1, LIME1, IKZF3, MIAT, ARHGAP9, CPNE7, LTB, LINC01871, CD27, CST7
## PDE4D, SPOCK2, PARP8, LBH, APOBEC3G, PBXIP1, GSTM2, TIGIT, RGL4, PYHIN1
## CCND2, FKBP11, PIM2, MAGED1, E2F5, CXCR6, TC2N, SNAI3, SDC1, LINC00861
## Negative: TOP2A, CENPF, ASPM, KIFC1, CDC20, DLGAP5, UBE2C, MKI67, MXD3, TPX2
## CCNB1, GTSE1, CDCA8, HJURP, CCNA2, KIF23, PLK1, NUSAP1, CDKN3, NUF2
## BIRC5, SGO1, CDCA2, DEPDC1B, CENPE, H2AX, SGO2, CDK1, KIF2C, KIF22
## PC_ 5
## Positive: SERPINB2, CCL7, CXCL3, S100A8, TMEM158, S100A12, CD93, MMP19, S100A9, CXCL2
## IL1B, DUSP6, CXCL8, PID1, CCL20, HPSE, EREG, ASPH, SPP1, CRADD
## ANPEP, THBD, FFAR2, CXCL5, CCL3L1, TCTEX1D1, RGCC, CXCL1, UBE2C, BCL2A1
## Negative: C1QA, C1QC, C1QB, HLA-DQA1, CD74, HLA-DQB1, HLA-DPB1, SDS, VMO1, SYNGR2
## CD86, LGMN, ACP5, HLA-DPA1, SLAMF7, AXL, GPR183, NCF1, NR1H3, APOE
## FPR3, HLA-DQA2, MS4A4A, HLA-DMB, TMEM176B, TREM2, TMEM176A, HLA-DRB5, CLEC10A, SPINT2
pbmc <- RunUMAP(pbmc, dims = 1:10)
## 06:39:03 UMAP embedding parameters a = 0.9922 b = 1.112
## 06:39:03 Read 662 rows and found 10 numeric columns
## 06:39:03 Using Annoy for neighbor search, n_neighbors = 30
## 06:39:03 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 06:39:03 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b15b12321d
## 06:39:03 Searching Annoy index using 1 thread, search_k = 3000
## 06:39:03 Annoy recall = 100%
## 06:39:04 Commencing smooth kNN distance calibration using 1 thread
## 06:39:05 Initializing from normalized Laplacian + noise
## 06:39:05 Commencing optimization for 500 epochs, with 23720 positive edges
## 06:39:06 Optimization finished
saveRDS(pbmc, file = "severe152.rds")
#input readRDS
hc51<-readRDS(file="hc51.rds")
hc52<-readRDS(file="hc52.rds")
hc100<-readRDS(file="hc100.rds")
mild141<-readRDS(file="mild141.rds")
mild142<-readRDS(file="mild142.rds")
mild144<-readRDS(file="mild144.rds")
severe143<-readRDS(file="severe143.rds")
severe145<-readRDS(file="severe145.rds")
severe146<-readRDS(file="severe146.rds")
severe148<-readRDS(file="severe148.rds")
severe149<-readRDS(file="severe149.rds")
severe152<-readRDS(file="severe152.rds")
#setup tech and celltype
hc51<-RenameCells(hc51,add.cell.id="hc51",for.merge=T)
hc51@meta.data$tech<-"healthy_control"
hc51@meta.data$celltype<-"healthy_control_51"
hc52<-RenameCells(hc52,add.cell.id="hc52",for.merge=T)
hc52@meta.data$tech<-"healthy_control"
hc52@meta.data$celltype<-"healthy_control_52"
hc100<-RenameCells(hc100,add.cell.id="hc100",for.merge=T)
hc100@meta.data$tech<-"healthy_control"
hc100@meta.data$celltype<-"healthy_control_100"
mild141<-RenameCells(mild141,add.cell.id="mild141",for.merge=T)
mild141@meta.data$tech<-"mild"
mild141@meta.data$celltype<-"mild_141"
mild142<-RenameCells(mild142,add.cell.id="mild142",for.merge=T)
mild142@meta.data$tech<-"mild"
mild142@meta.data$celltype<-"mild_142"
mild144<-RenameCells(mild144,add.cell.id="mild144",for.merge=T)
mild144@meta.data$tech<-"mild"
mild144@meta.data$celltype<-"mild_144"
severe143<-RenameCells(severe143,add.cell.id="severe143",for.merge=T)
severe143@meta.data$tech<-"severe"
severe143@meta.data$celltype<-"severe_143"
severe145<-RenameCells(severe145,add.cell.id="severe145",for.merge=T)
severe145@meta.data$tech<-"severe"
severe145@meta.data$celltype<-"severe_145"
severe146<-RenameCells(severe146,add.cell.id="severe146",for.merge=T)
severe146@meta.data$tech<-"severe"
severe146@meta.data$celltype<-"severe_146"
severe148<-RenameCells(severe148,add.cell.id="severe148",for.merge=T)
severe148@meta.data$tech<-"severe"
severe148@meta.data$celltype<-"severe_148"
severe149<-RenameCells(severe149,add.cell.id="severe149",for.merge=T)
severe149@meta.data$tech<-"severe"
severe149@meta.data$celltype<-"severe_149"
severe152<-RenameCells(severe152,add.cell.id="severe152",for.merge=T)
severe152@meta.data$tech<-"severe"
severe152@meta.data$celltype<-"severe_152"
#merge
h1_2<-merge(hc51,hc52)
h123<-merge(h1_2,hc100)
m1_2<-merge(mild141,mild142)
m123<-merge(m1_2,mild144)
s1_2<-merge(severe143,severe145)
s123<-merge(s1_2,severe146)
s4_5<-merge(severe148,severe149)
s456<-merge(s4_5,severe152)
s123456<-merge(s123,s456)
hm<-merge(h123,m123)
hms<-merge(hm,s123456)
saveRDS(hms, file="hms_before_integrate.rds")
#before integrate
hms[["percent.mt"]] <- PercentageFeatureSet(hms, pattern = "^Mt-")
VlnPlot(hms, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3,pt.size = 0)
## Warning in SingleExIPlot(type = type, data = data[, x, drop = FALSE], idents =
## idents, : All cells have the same value of percent.mt.

pancreas <- NormalizeData(object = hms, normalization.method = "LogNormalize", scale.factor = 1e4)
pancreas <- FindVariableFeatures(pancreas, selection.method = "vst", nfeatures = 2000, verbose = FALSE)
pancreas <- ScaleData(pancreas, verbose = FALSE)
pancreas <- RunPCA(pancreas, npcs = 30, verbose = FALSE)
pancreas <- RunUMAP(pancreas, reduction = "pca", dims = 1:30)
## 06:43:32 UMAP embedding parameters a = 0.9922 b = 1.112
## 06:43:32 Read 52158 rows and found 30 numeric columns
## 06:43:32 Using Annoy for neighbor search, n_neighbors = 30
## 06:43:32 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 06:43:40 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b15d5d3735
## 06:43:40 Searching Annoy index using 1 thread, search_k = 3000
## 06:43:57 Annoy recall = 100%
## 06:43:57 Commencing smooth kNN distance calibration using 1 thread
## 06:43:59 Initializing from normalized Laplacian + noise
## 06:44:02 Commencing optimization for 200 epochs, with 2357338 positive edges
## 06:44:22 Optimization finished
p1 <- DimPlot(pancreas, reduction = "umap", group.by = "tech")
p2 <- DimPlot(pancreas, reduction = "umap", group.by = "celltype", label = TRUE, repel = TRUE) +
NoLegend()
plot_grid(p1,p2)

#integrate
pancreas.list <- SplitObject(pancreas, split.by = "celltype")
for (i in 1: length(pancreas.list)) {
pancreas.list[[i]] <- NormalizeData(pancreas.list[[i]], verbose = FALSE)
pancreas.list[[i]] <- FindVariableFeatures(pancreas.list[[i]], selection.method = "vst", nfeatures = 2000,
verbose = FALSE)
}
reference.list <- pancreas.list[c("healthy_control_51","healthy_control_52","healthy_control_100","mild_141",
"mild_142","mild_144", "severe_143", "severe_145", "severe_146", "severe_148",
"severe_149", "severe_152")]
pancreas.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:30)
## Computing 2000 integration features
## Scaling features for provided objects
## Finding all pairwise anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 22355 anchors
## Filtering anchors
## Retained 4674 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 1464 anchors
## Filtering anchors
## Retained 1171 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 1406 anchors
## Filtering anchors
## Retained 1078 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 5743 anchors
## Filtering anchors
## Retained 1976 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 6214 anchors
## Filtering anchors
## Retained 1132 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 1330 anchors
## Filtering anchors
## Retained 844 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 6653 anchors
## Filtering anchors
## Retained 2100 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 7667 anchors
## Filtering anchors
## Retained 1464 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 1403 anchors
## Filtering anchors
## Retained 843 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 4174 anchors
## Filtering anchors
## Retained 3295 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 1749 anchors
## Filtering anchors
## Retained 1412 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 1683 anchors
## Filtering anchors
## Retained 1159 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 1089 anchors
## Filtering anchors
## Retained 925 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 1436 anchors
## Filtering anchors
## Retained 1396 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 1518 anchors
## Filtering anchors
## Retained 1478 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 26470 anchors
## Filtering anchors
## Retained 1322 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 25133 anchors
## Filtering anchors
## Retained 699 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 1499 anchors
## Filtering anchors
## Retained 329 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 5535 anchors
## Filtering anchors
## Retained 1391 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 6575 anchors
## Filtering anchors
## Retained 1665 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 1655 anchors
## Filtering anchors
## Retained 496 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 29135 anchors
## Filtering anchors
## Retained 1596 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 28477 anchors
## Filtering anchors
## Retained 1149 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 1488 anchors
## Filtering anchors
## Retained 343 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 5554 anchors
## Filtering anchors
## Retained 1312 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 6527 anchors
## Filtering anchors
## Retained 1667 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 1634 anchors
## Filtering anchors
## Retained 520 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 31795 anchors
## Filtering anchors
## Retained 7563 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 9221 anchors
## Filtering anchors
## Retained 1203 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 9137 anchors
## Filtering anchors
## Retained 1052 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 1381 anchors
## Filtering anchors
## Retained 526 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 4569 anchors
## Filtering anchors
## Retained 1404 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 5203 anchors
## Filtering anchors
## Retained 1457 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 1551 anchors
## Filtering anchors
## Retained 700 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 9057 anchors
## Filtering anchors
## Retained 4508 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 9376 anchors
## Filtering anchors
## Retained 2695 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 3196 anchors
## Filtering anchors
## Retained 1671 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 3087 anchors
## Filtering anchors
## Retained 1340 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 1367 anchors
## Filtering anchors
## Retained 889 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 2410 anchors
## Filtering anchors
## Retained 1752 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 2549 anchors
## Filtering anchors
## Retained 1798 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 1316 anchors
## Filtering anchors
## Retained 953 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 3314 anchors
## Filtering anchors
## Retained 2834 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 3282 anchors
## Filtering anchors
## Retained 2242 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 2697 anchors
## Filtering anchors
## Retained 2337 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 6494 anchors
## Filtering anchors
## Retained 2055 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 7023 anchors
## Filtering anchors
## Retained 1591 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 1477 anchors
## Filtering anchors
## Retained 802 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 3698 anchors
## Filtering anchors
## Retained 2245 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 4414 anchors
## Filtering anchors
## Retained 2686 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 1610 anchors
## Filtering anchors
## Retained 949 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 6726 anchors
## Filtering anchors
## Retained 4706 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 6381 anchors
## Filtering anchors
## Retained 3811 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 4553 anchors
## Filtering anchors
## Retained 2699 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 2760 anchors
## Filtering anchors
## Retained 1993 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 2794 anchors
## Filtering anchors
## Retained 1741 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 2930 anchors
## Filtering anchors
## Retained 1378 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 1358 anchors
## Filtering anchors
## Retained 1001 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 2380 anchors
## Filtering anchors
## Retained 1729 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 2597 anchors
## Filtering anchors
## Retained 1922 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 1509 anchors
## Filtering anchors
## Retained 1105 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 2759 anchors
## Filtering anchors
## Retained 2216 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 2754 anchors
## Filtering anchors
## Retained 2088 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 2527 anchors
## Filtering anchors
## Retained 1782 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 1999 anchors
## Filtering anchors
## Retained 1555 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 2664 anchors
## Filtering anchors
## Retained 2153 anchors
pancreas.integrated <- IntegrateData(anchorset = pancreas.anchors, dims = 1:30)
## Merging dataset 10 into 7
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 6 into 5
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 3 into 1
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 12 into 11
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 4 into 5 6
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 11 12 into 7 10
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 9 into 7 10 11 12
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 5 6 4 into 1 3
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 2 into 1 3 5 6 4
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 8 into 7 10 11 12 9
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 1 3 5 6 4 2 into 7 10 11 12 9 8
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Warning: Adding a command log without an assay associated with it
DefaultAssay(pancreas.integrated) <- "integrated"
pancreas.integrated <- ScaleData(pancreas.integrated, verbose = FALSE)
pancreas.integrated <- RunPCA(pancreas.integrated, npcs = 30, verbose = FALSE)
pancreas.integrated <- RunUMAP(pancreas.integrated, reduction = "pca", dims = 1:30)
## 07:55:08 UMAP embedding parameters a = 0.9922 b = 1.112
## 07:55:08 Read 52158 rows and found 30 numeric columns
## 07:55:08 Using Annoy for neighbor search, n_neighbors = 30
## 07:55:09 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 07:57:03 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b14c673012
## 07:57:03 Searching Annoy index using 1 thread, search_k = 3000
## 07:57:22 Annoy recall = 100%
## 07:57:22 Commencing smooth kNN distance calibration using 1 thread
## 07:57:24 Initializing from normalized Laplacian + noise
## 07:57:38 Commencing optimization for 200 epochs, with 2462718 positive edges
## 07:57:59 Optimization finished
p1 <- DimPlot(pancreas.integrated, reduction = "umap", group.by = "tech")
p2 <- DimPlot(pancreas.integrated, reduction = "umap", group.by = "celltype")
plot_grid(p1,p2)

saveRDS(pancreas.integrated, file = "hms_individual_integrated_OK.rds")
#draw the plot group.by celltype
hms_individual_integrated<-readRDS(file="hms_individual_integrated_OK.rds")
p1 <- DimPlot(hms_individual_integrated, reduction = "umap", group.by = "celltype")
p1

#find how many 15cluster
ElbowPlot(hms_individual_integrated)

hms_neighbor<- FindNeighbors(hms_individual_integrated, dims = 1:10)
## Computing nearest neighbor graph
## Computing SNN
hms_cluster <- FindClusters( hms_neighbor, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 52158
## Number of edges: 1604563
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8838
## Number of communities: 15
## Elapsed time: 18 seconds
head(Idents(hms_cluster), 5)
## hc51_AAACCTGAGACACTAA-1 hc51_AAACCTGAGGAGTACC-1 hc51_AAACCTGAGGATATAC-1
## 1 9 1
## hc51_AAACCTGAGGTCATCT-1 hc51_AAACCTGCACGGATAG-1
## 1 3
## Levels: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
hms_cluster<- RunUMAP(hms_cluster, dims = 1:10)
## 08:00:27 UMAP embedding parameters a = 0.9922 b = 1.112
## 08:00:27 Read 52158 rows and found 10 numeric columns
## 08:00:27 Using Annoy for neighbor search, n_neighbors = 30
## 08:00:27 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 08:00:34 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b1c654706
## 08:00:34 Searching Annoy index using 1 thread, search_k = 3000
## 08:00:52 Annoy recall = 100%
## 08:00:53 Commencing smooth kNN distance calibration using 1 thread
## 08:00:55 Initializing from normalized Laplacian + noise
## 08:00:57 Commencing optimization for 200 epochs, with 2146824 positive edges
## 08:01:17 Optimization finished
DimPlot(hms_cluster, reduction = "umap")

saveRDS(hms_cluster, file = "hms_cluster_test.rds")
#name each cluster id
new.cluster.ids <- c("Macrophage", "Macrophage", "Macrophage", "Macrophage", "Neutrophils", "Macrophage", "Naive_CD4_T", "NK", "Neutrophils", "Dendritic", "T", "T","Basal","Plasma","T")
names(new.cluster.ids) <- levels(hms_cluster)
hms_cluster_id<- RenameIdents(hms_cluster, new.cluster.ids)
DimPlot(hms_cluster_id, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()

saveRDS(hms_cluster_id, file = "hms_cluster_id_test.rds")
#hms_cluster_id<-readRDS(file="hms_cluster_id_test.rds")
Macrophage<-subset(hms_cluster_id, idents=c('Macrophage'))
DimPlot(Macrophage, reduction = "umap")

saveRDS(Macrophage, file="Macrophage.rds")
Neutrophils<-subset(hms_cluster_id, idents=c('Neutrophils'))
DimPlot(Neutrophils, reduction = "umap")

saveRDS(Neutrophils, file="Neutrophils.rds")
Naive_CD4_T<-subset(hms_cluster_id, idents=c('Naive_CD4_T'))
DimPlot(Naive_CD4_T, reduction = "umap")

saveRDS(Naive_CD4_T, file="Naive_CD4_T.rds")
NK<-subset(hms_cluster_id, idents=c('NK'))
DimPlot(NK, reduction = "umap")

saveRDS(NK, file="NK.rds")
Dendritic<-subset(hms_cluster_id, idents=c('Dendritic'))
DimPlot(Dendritic, reduction = "umap")

saveRDS(Dendritic, file="Dendritic.rds")
Basal<-subset(hms_cluster_id, idents=c('Basal'))
DimPlot(Basal, reduction = "umap")

saveRDS(Basal, file="Basal.rds")
T<-subset(hms_cluster_id, idents=c('T'))
DimPlot(T, reduction = "umap")

saveRDS(T, file="T.rds")
Plasma<-subset(hms_cluster_id, idents=c('Plasma'))
DimPlot(Plasma, reduction = "umap")

saveRDS(Plasma, file="Plasma.rds")
#input each cluster
Basal<-readRDS("Basal.rds")
Dendritic<-readRDS("Dendritic.rds")
Macrophage<-readRDS("Macrophage.rds")
Naive_CD4_T<-readRDS("Naive_CD4_T.rds")
Neutrophils<-readRDS("Neutrophils.rds")
NK<-readRDS("NK.rds")
Plasma<-readRDS("Plasma.rds")
T<-readRDS("T.rds")
DimPlot(Basal, reduction = "umap", split.by = "tech")

DimPlot(Dendritic, reduction = "umap", split.by = "tech")

DimPlot(Macrophage, reduction = "umap", split.by = "tech")

DimPlot(Naive_CD4_T, reduction = "umap", split.by = "tech")

DimPlot(Neutrophils, reduction = "umap", split.by = "tech")

DimPlot(NK, reduction = "umap", split.by = "tech")

DimPlot(Plasma, reduction = "umap", split.by = "tech")

DimPlot(T, reduction = "umap", split.by = "tech")

markers.to.plot <- c("IL1A", "CXCL2","TNFAIP3","MAFF","PPP1R15A","NFKBIA","PTX3","CXCL3","CCL20","IFIT2","ARRDC3","EREG","ARSE","MSP2K6","DHCR7","UCP2","SLC25A10","VIL1","MCM5","DHCR24","SLC9A3R1","PFN1","TPPP3","DEGS2","RAB26")
DoHeatmap(subset(Basal,downsample=50000), features = markers.to.plot, size = 5, group.by="tech")
## Warning in DoHeatmap(subset(Basal, downsample = 50000), features =
## markers.to.plot, : The following features were omitted as they were not found
## in the scale.data slot for the integrated assay: RAB26, DEGS2, PFN1, SLC9A3R1,
## MCM5, VIL1, SLC25A10, DHCR7, MSP2K6, ARSE, ARRDC3

DoHeatmap(subset(Dendritic,downsample=50000), features = markers.to.plot, size = 5, group.by="tech")
## Warning in DoHeatmap(subset(Dendritic, downsample = 50000), features =
## markers.to.plot, : The following features were omitted as they were not found
## in the scale.data slot for the integrated assay: RAB26, DEGS2, PFN1, SLC9A3R1,
## MCM5, VIL1, SLC25A10, DHCR7, MSP2K6, ARSE, ARRDC3

DoHeatmap(subset(Macrophage,downsample=50000), features = markers.to.plot, size = 5, group.by="tech")
## Warning in DoHeatmap(subset(Macrophage, downsample = 50000), features =
## markers.to.plot, : The following features were omitted as they were not found
## in the scale.data slot for the integrated assay: RAB26, DEGS2, PFN1, SLC9A3R1,
## MCM5, VIL1, SLC25A10, DHCR7, MSP2K6, ARSE, ARRDC3

DoHeatmap(subset(Naive_CD4_T,downsample=50000), features = markers.to.plot, size = 5, group.by="tech")
## Warning in DoHeatmap(subset(Naive_CD4_T, downsample = 50000), features =
## markers.to.plot, : The following features were omitted as they were not found
## in the scale.data slot for the integrated assay: RAB26, DEGS2, PFN1, SLC9A3R1,
## MCM5, VIL1, SLC25A10, DHCR7, MSP2K6, ARSE, ARRDC3

DoHeatmap(subset(Neutrophils,downsample=50000), features = markers.to.plot, size = 5, group.by="tech")
## Warning in DoHeatmap(subset(Neutrophils, downsample = 50000), features =
## markers.to.plot, : The following features were omitted as they were not found
## in the scale.data slot for the integrated assay: RAB26, DEGS2, PFN1, SLC9A3R1,
## MCM5, VIL1, SLC25A10, DHCR7, MSP2K6, ARSE, ARRDC3

DoHeatmap(subset(NK,downsample=50000), features = markers.to.plot, size = 5, group.by="tech")
## Warning in DoHeatmap(subset(NK, downsample = 50000), features =
## markers.to.plot, : The following features were omitted as they were not found
## in the scale.data slot for the integrated assay: RAB26, DEGS2, PFN1, SLC9A3R1,
## MCM5, VIL1, SLC25A10, DHCR7, MSP2K6, ARSE, ARRDC3

DoHeatmap(subset(Plasma,downsample=50000), features = markers.to.plot, size = 5, group.by="tech")
## Warning in DoHeatmap(subset(Plasma, downsample = 50000), features =
## markers.to.plot, : The following features were omitted as they were not found
## in the scale.data slot for the integrated assay: RAB26, DEGS2, PFN1, SLC9A3R1,
## MCM5, VIL1, SLC25A10, DHCR7, MSP2K6, ARSE, ARRDC3

DoHeatmap(subset(T,downsample=50000), features = markers.to.plot, size = 5, group.by="tech")
## Warning in DoHeatmap(subset(T, downsample = 50000), features =
## markers.to.plot, : The following features were omitted as they were not found
## in the scale.data slot for the integrated assay: RAB26, DEGS2, PFN1, SLC9A3R1,
## MCM5, VIL1, SLC25A10, DHCR7, MSP2K6, ARSE, ARRDC3

RidgePlot(Basal, features = c("IL1A", "CXCL2","TNFAIP3","MAFF","PPP1R15A","NFKBIA","PTX3","CXCL3","CCL20","IFIT2","EREG","UCP2","DHCR24","TPPP3"),cols = c("green3","cornflowerblue","orangered"), group.by="tech", ncol = 3) + theme(axis.title.y = element_blank())
## Picking joint bandwidth of 0.00325
## Picking joint bandwidth of 0.0474
## Picking joint bandwidth of 0.117
## Picking joint bandwidth of 0.0865
## Picking joint bandwidth of 0.184
## Picking joint bandwidth of 0.17
## Picking joint bandwidth of 0.00184
## Picking joint bandwidth of 0.0186
## Picking joint bandwidth of 0.0215
## Picking joint bandwidth of 0.268
## Picking joint bandwidth of 0.0309
## Picking joint bandwidth of 0.153
## Picking joint bandwidth of 0.0523
## Picking joint bandwidth of 0.216

RidgePlot(Dendritic, features = c("IL1A", "CXCL2","TNFAIP3","MAFF","PPP1R15A","NFKBIA","PTX3","CXCL3","CCL20","IFIT2","EREG","UCP2","DHCR24","TPPP3"),cols = c("green3","cornflowerblue","orangered"), group.by="tech", ncol = 3) + theme(axis.title.y = element_blank())
## Picking joint bandwidth of 0.00878
## Picking joint bandwidth of 0.0276
## Picking joint bandwidth of 0.0504
## Picking joint bandwidth of 0.0327
## Picking joint bandwidth of 0.0744
## Picking joint bandwidth of 0.185
## Picking joint bandwidth of 0.00438
## Picking joint bandwidth of 0.0285
## Picking joint bandwidth of 0.0113
## Picking joint bandwidth of 0.22
## Picking joint bandwidth of 0.0226
## Picking joint bandwidth of 0.139
## Picking joint bandwidth of 0.00564
## Picking joint bandwidth of 0.0065

RidgePlot(Macrophage, features = c("IL1A", "CXCL2","TNFAIP3","MAFF","PPP1R15A","NFKBIA","PTX3","CXCL3","CCL20","IFIT2","EREG","UCP2","DHCR24","TPPP3"), cols = c("green3","cornflowerblue","orangered"),group.by="tech", ncol = 3) + theme(axis.title.y = element_blank())
## Picking joint bandwidth of 0.0032
## Picking joint bandwidth of 0.0547
## Picking joint bandwidth of 0.0595
## Picking joint bandwidth of 0.0232
## Picking joint bandwidth of 0.0672
## Picking joint bandwidth of 0.0975
## Picking joint bandwidth of 0.0025
## Picking joint bandwidth of 0.0244
## Picking joint bandwidth of 0.00684
## Picking joint bandwidth of 0.118
## Picking joint bandwidth of 0.027
## Picking joint bandwidth of 0.0945
## Picking joint bandwidth of 0.00494
## Picking joint bandwidth of 0.00208

RidgePlot(Naive_CD4_T, features = c("IL1A", "CXCL2","TNFAIP3","MAFF","PPP1R15A","NFKBIA","PTX3","CXCL3","CCL20","IFIT2","EREG","UCP2","DHCR24","TPPP3"), cols = c("green3","cornflowerblue","orangered"),group.by="tech", ncol = 3) + theme(axis.title.y = element_blank())
## Picking joint bandwidth of 0.000519
## Picking joint bandwidth of 0.007
## Picking joint bandwidth of 0.137
## Picking joint bandwidth of 0.0131
## Picking joint bandwidth of 0.105
## Picking joint bandwidth of 0.151
## Picking joint bandwidth of 0.00046
## Picking joint bandwidth of 0.00276
## Picking joint bandwidth of 0.0178
## Picking joint bandwidth of 0.156
## Picking joint bandwidth of 0.003
## Picking joint bandwidth of 0.094
## Picking joint bandwidth of 0.00795
## Picking joint bandwidth of 0.00122

RidgePlot(Neutrophils, features = c("IL1A", "CXCL2","TNFAIP3","MAFF","PPP1R15A","NFKBIA","PTX3","CXCL3","CCL20","IFIT2","EREG","UCP2","DHCR24","TPPP3"), cols = c("green3","cornflowerblue","orangered"),group.by="tech", ncol = 3) + theme(axis.title.y = element_blank())
## Picking joint bandwidth of 0.00741
## Picking joint bandwidth of 0.125
## Picking joint bandwidth of 0.174
## Picking joint bandwidth of 0.099
## Picking joint bandwidth of 0.208
## Picking joint bandwidth of 0.296
## Picking joint bandwidth of 0.00974
## Picking joint bandwidth of 0.145
## Picking joint bandwidth of 0.0618
## Picking joint bandwidth of 0.241
## Picking joint bandwidth of 0.0378
## Picking joint bandwidth of 0.0676
## Picking joint bandwidth of 0.00527
## Picking joint bandwidth of 0.00183

RidgePlot(NK, features = c("IL1A", "CXCL2","TNFAIP3","MAFF","PPP1R15A","NFKBIA","PTX3","CXCL3","CCL20","IFIT2","EREG","UCP2","DHCR24","TPPP3"), cols = c("green3","cornflowerblue","orangered"),group.by="tech", ncol = 3) + theme(axis.title.y = element_blank())
## Picking joint bandwidth of 0.000989
## Picking joint bandwidth of 0.00466
## Picking joint bandwidth of 0.127
## Picking joint bandwidth of 0.0269
## Picking joint bandwidth of 0.0874
## Picking joint bandwidth of 0.123
## Picking joint bandwidth of 0.00018
## Picking joint bandwidth of 0.00249
## Picking joint bandwidth of 0.00739
## Picking joint bandwidth of 0.144
## Picking joint bandwidth of 0.00219
## Picking joint bandwidth of 0.115
## Picking joint bandwidth of 0.0115
## Picking joint bandwidth of 0.00101

RidgePlot(Plasma, features = c("IL1A", "CXCL2","TNFAIP3","MAFF","PPP1R15A","NFKBIA","PTX3","CXCL3","CCL20","IFIT2","EREG","UCP2","DHCR24","TPPP3"), cols = c("green3","cornflowerblue","orangered"),group.by="tech", ncol = 3) + theme(axis.title.y = element_blank())
## Picking joint bandwidth of 0.00359
## Picking joint bandwidth of 0.016
## Picking joint bandwidth of 0.0848
## Picking joint bandwidth of 0.0223
## Picking joint bandwidth of 0.124
## Picking joint bandwidth of 0.171
## Picking joint bandwidth of 0.00139
## Picking joint bandwidth of 0.00849
## Picking joint bandwidth of 0.00974
## Picking joint bandwidth of 0.217
## Picking joint bandwidth of 0.00637
## Picking joint bandwidth of 0.162
## Picking joint bandwidth of 0.019
## Picking joint bandwidth of 0.0038

RidgePlot(T, features = c("IL1A", "CXCL2","TNFAIP3","MAFF","PPP1R15A","NFKBIA","PTX3","CXCL3","CCL20","IFIT2","EREG","UCP2","DHCR24","TPPP3"), cols = c("green3","cornflowerblue","orangered"),group.by="tech", ncol = 3) + theme(axis.title.y = element_blank())
## Picking joint bandwidth of 0.00406
## Picking joint bandwidth of 0.0337
## Picking joint bandwidth of 0.115
## Picking joint bandwidth of 0.036
## Picking joint bandwidth of 0.127
## Picking joint bandwidth of 0.159
## Picking joint bandwidth of 0.00745
## Picking joint bandwidth of 0.0229
## Picking joint bandwidth of 0.0128
## Picking joint bandwidth of 0.196
## Picking joint bandwidth of 0.0213
## Picking joint bandwidth of 0.139
## Picking joint bandwidth of 0.0111
## Picking joint bandwidth of 0.00282
